Backed out 2 changesets (bug 1151899) for build bustage a=backout

Backed out changeset 7b5270bac41b (bug 1151899)
Backed out changeset 62f38c70d152 (bug 1151899)
This commit is contained in:
Wes Kocher 2016-10-28 14:27:45 -07:00
parent 00d282bd65
commit 90f6b234f3
96 changed files with 0 additions and 696593 deletions

View File

@ -1,2 +0,0 @@
/target
/Cargo.lock

View File

@ -1,19 +0,0 @@
[package]
name = "rust_url_capi"
version = "0.0.1"
authors = ["Valentin Gosu <valentin.gosu@gmail.com>"]
[profile.dev]
opt-level = 3
debug = true
rpath = true
lto = true
[lib]
name = "rust_url_capi"
[dependencies]
libc = "0.2.0"
url = "1.2.1"

View File

@ -1,68 +0,0 @@
use url::ParseError;
pub trait ErrorCode {
fn error_code(&self) -> i32;
}
impl<T: ErrorCode> ErrorCode for Result<(), T> {
fn error_code(&self) -> i32 {
match *self {
Ok(_) => 0,
Err(ref error) => error.error_code(),
}
}
}
impl ErrorCode for () {
fn error_code(&self) -> i32 {
return -1;
}
}
impl ErrorCode for ParseError {
fn error_code(&self) -> i32 {
return -1;
// match *self {
// ParseError::EmptyHost => -1,
// ParseError::InvalidScheme => -2,
// ParseError::InvalidPort => -3,
// ParseError::InvalidIpv6Address => -4,
// ParseError::InvalidDomainCharacter => -5,
// ParseError::InvalidCharacter => -6,
// ParseError::InvalidBackslash => -7,
// ParseError::InvalidPercentEncoded => -8,
// ParseError::InvalidAtSymbolInUser => -9,
// ParseError::ExpectedTwoSlashes => -10,
// ParseError::ExpectedInitialSlash => -11,
// ParseError::NonUrlCodePoint => -12,
// ParseError::RelativeUrlWithScheme => -13,
// ParseError::RelativeUrlWithoutBase => -14,
// ParseError::RelativeUrlWithNonRelativeBase => -15,
// ParseError::NonAsciiDomainsNotSupportedYet => -16,
// ParseError::CannotSetJavascriptFragment => -17,
// ParseError::CannotSetPortWithFileLikeScheme => -18,
// ParseError::CannotSetUsernameWithNonRelativeScheme => -19,
// ParseError::CannotSetPasswordWithNonRelativeScheme => -20,
// ParseError::CannotSetHostPortWithNonRelativeScheme => -21,
// ParseError::CannotSetHostWithNonRelativeScheme => -22,
// ParseError::CannotSetPortWithNonRelativeScheme => -23,
// ParseError::CannotSetPathWithNonRelativeScheme => -24,
// }
}
}
pub enum NSError {
OK,
InvalidArg,
Failure,
}
impl ErrorCode for NSError {
#[allow(overflowing_literals)]
fn error_code(&self) -> i32 {
match *self {
NSError::OK => 0,
NSError::InvalidArg => 0x80070057,
NSError::Failure => 0x80004005
}
}
}

View File

@ -1,477 +0,0 @@
extern crate url;
use url::{Url, ParseError, ParseOptions};
use url::quirks;
extern crate libc;
use libc::size_t;
use std::mem;
use std::str;
#[allow(non_camel_case_types)]
pub type rusturl_ptr = *const libc::c_void;
mod string_utils;
pub use string_utils::*;
mod error_mapping;
use error_mapping::*;
fn parser<'a>() -> ParseOptions<'a> {
Url::options()
}
fn default_port(scheme: &str) -> Option<u32> {
match scheme {
"ftp" => Some(21),
"gopher" => Some(70),
"http" => Some(80),
"https" => Some(443),
"ws" => Some(80),
"wss" => Some(443),
"rtsp" => Some(443),
"moz-anno" => Some(443),
"android" => Some(443),
_ => None,
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_new(spec: *mut libc::c_char, len: size_t) -> rusturl_ptr {
let slice = std::slice::from_raw_parts(spec as *const libc::c_uchar, len as usize);
let url_spec = match str::from_utf8(slice) {
Ok(spec) => spec,
Err(_) => return 0 as rusturl_ptr
};
let url = match parser().parse(url_spec) {
Ok(url) => url,
Err(_) => return 0 as rusturl_ptr
};
let url = Box::new(url);
Box::into_raw(url) as rusturl_ptr
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_free(urlptr: rusturl_ptr) {
if urlptr.is_null() {
return ();
}
let url: Box<Url> = Box::from_raw(urlptr as *mut url::Url);
drop(url);
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_spec(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
cont.assign(&url.to_string())
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_scheme(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
cont.assign(&url.scheme())
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_username(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
if url.cannot_be_a_base() {
cont.set_size(0)
} else {
cont.assign(url.username())
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_password(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
match url.password() {
Some(p) => cont.assign(&p.to_string()),
None => cont.set_size(0)
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_host(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
match url.host() {
Some(h) => cont.assign(&h.to_string()),
None => cont.set_size(0)
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_port(urlptr: rusturl_ptr) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
match url.port() {
Some(port) => port as i32,
None => -1
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_path(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
if url.cannot_be_a_base() {
cont.set_size(0)
} else {
cont.assign(url.path())
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_query(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
match url.query() {
Some(ref s) => cont.assign(s),
None => cont.set_size(0)
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_get_fragment(urlptr: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
match url.fragment() {
Some(ref fragment) => cont.assign(fragment),
None => cont.set_size(0)
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_has_fragment(urlptr: rusturl_ptr) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &Url = mem::transmute(urlptr);
match url.fragment() {
Some(_) => return 1,
None => return 0
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_scheme(urlptr: rusturl_ptr, scheme: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(scheme as *const libc::c_uchar, len as usize);
let scheme_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_protocol(url, scheme_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_username(urlptr: rusturl_ptr, username: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(username as *const libc::c_uchar, len as usize);
let username_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_username(url, username_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_password(urlptr: rusturl_ptr, password: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(password as *const libc::c_uchar, len as usize);
let password_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_password(url, password_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_host_and_port(urlptr: rusturl_ptr, host_and_port: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(host_and_port as *const libc::c_uchar, len as usize);
let host_and_port_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_host(url, host_and_port_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_host(urlptr: rusturl_ptr, host: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(host as *const libc::c_uchar, len as usize);
let hostname = match str::from_utf8(slice).ok() {
Some(h) => h,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_hostname(url, hostname).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_port(urlptr: rusturl_ptr, port: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(port as *const libc::c_uchar, len as usize);
let port_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_port(url, port_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_port_no(urlptr: rusturl_ptr, new_port: i32) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
if url.cannot_be_a_base() {
-100
} else {
if url.scheme() == "file" {
return -100;
}
match default_port(url.scheme()) {
Some(def_port) => if new_port == def_port as i32 {
let _ = url.set_port(None);
return NSError::OK.error_code();
},
None => {}
};
if new_port > std::u16::MAX as i32 || new_port < 0 {
let _ = url.set_port(None);
} else {
let _ = url.set_port(Some(new_port as u16));
}
NSError::OK.error_code()
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_path(urlptr: rusturl_ptr, path: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(path as *const libc::c_uchar, len as usize);
let path_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_pathname(url, path_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_query(urlptr: rusturl_ptr, query: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(query as *const libc::c_uchar, len as usize);
let query_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_search(url, query_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_set_fragment(urlptr: rusturl_ptr, fragment: *mut libc::c_char, len: size_t) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let mut url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(fragment as *const libc::c_uchar, len as usize);
let fragment_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return ParseError::InvalidDomainCharacter.error_code() // utf-8 failed
};
quirks::set_hash(url, fragment_).error_code()
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_resolve(urlptr: rusturl_ptr, resolve: *mut libc::c_char, len: size_t, cont: *mut libc::c_void) -> i32 {
if urlptr.is_null() {
return NSError::InvalidArg.error_code();
}
let url: &mut Url = mem::transmute(urlptr);
let slice = std::slice::from_raw_parts(resolve as *const libc::c_uchar, len as usize);
let resolve_ = match str::from_utf8(slice).ok() {
Some(p) => p,
None => return NSError::Failure.error_code()
};
match parser().base_url(Some(&url)).parse(resolve_).ok() {
Some(u) => cont.assign(&u.to_string()),
None => cont.set_size(0)
}
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_common_base_spec(urlptr1: rusturl_ptr, urlptr2: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr1.is_null() || urlptr2.is_null() {
return NSError::InvalidArg.error_code();
}
let url1: &Url = mem::transmute(urlptr1);
let url2: &Url = mem::transmute(urlptr2);
if url1 == url2 {
return cont.assign(&url1.to_string());
}
if url1.scheme() != url2.scheme() ||
url1.host() != url2.host() ||
url1.username() != url2.username() ||
url1.password() != url2.password() ||
url1.port() != url2.port() {
return cont.set_size(0);
}
let path1 = match url1.path_segments() {
Some(path) => path,
None => return cont.set_size(0)
};
let path2 = match url2.path_segments() {
Some(path) => path,
None => return cont.set_size(0)
};
let mut url = url1.clone();
url.set_query(None);
let _ = url.set_host(None);
{
let mut new_segments = if let Ok(segments) = url.path_segments_mut() {
segments
} else {
return cont.set_size(0)
};
for (p1, p2) in path1.zip(path2) {
if p1 != p2 {
break;
} else {
new_segments.push(p1);
}
}
}
cont.assign(&url.to_string())
}
#[no_mangle]
pub unsafe extern "C" fn rusturl_relative_spec(urlptr1: rusturl_ptr, urlptr2: rusturl_ptr, cont: *mut libc::c_void) -> i32 {
if urlptr1.is_null() || urlptr2.is_null() {
return NSError::InvalidArg.error_code();
}
let url1: &Url = mem::transmute(urlptr1);
let url2: &Url = mem::transmute(urlptr2);
if url1 == url2 {
return cont.set_size(0);
}
if url1.scheme() != url2.scheme() ||
url1.host() != url2.host() ||
url1.username() != url2.username() ||
url1.password() != url2.password() ||
url1.port() != url2.port() {
return cont.assign(&url2.to_string());
}
let mut path1 = match url1.path_segments() {
Some(path) => path,
None => return cont.assign(&url2.to_string())
};
let mut path2 = match url2.path_segments() {
Some(path) => path,
None => return cont.assign(&url2.to_string())
};
// TODO: file:// on WIN?
// Exhaust the part of the iterators that match
while let (Some(ref p1), Some(ref p2)) = (path1.next(), path2.next()) {
if p1 != p2 {
break;
}
}
let mut buffer: String = "".to_string();
for _ in path1 {
buffer = buffer + "../";
}
for p2 in path2 {
buffer = buffer + p2 + "/";
}
return cont.assign(&buffer);
}

View File

@ -1,45 +0,0 @@
#ifndef __RUST_URL_CAPI
#define __RUST_URL_CAPI
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
struct rusturl;
typedef struct rusturl* rusturl_ptr;
rusturl_ptr rusturl_new(const char *spec, size_t src_len);
void rusturl_free(rusturl_ptr url);
int32_t rusturl_get_spec(rusturl_ptr url, void*);
int32_t rusturl_get_scheme(rusturl_ptr url, void*);
int32_t rusturl_get_username(rusturl_ptr url, void*);
int32_t rusturl_get_password(rusturl_ptr url, void*);
int32_t rusturl_get_host(rusturl_ptr url, void*);
int32_t rusturl_get_port(rusturl_ptr url); // returns port or -1
int32_t rusturl_get_path(rusturl_ptr url, void*);
int32_t rusturl_get_query(rusturl_ptr url, void*);
int32_t rusturl_get_fragment(rusturl_ptr url, void*);
int32_t rusturl_has_fragment(rusturl_ptr url); // 1 true, 0 false, < 0 error
int32_t rusturl_set_scheme(rusturl_ptr url, const char *scheme, size_t len);
int32_t rusturl_set_username(rusturl_ptr url, const char *user, size_t len);
int32_t rusturl_set_password(rusturl_ptr url, const char *pass, size_t len);
int32_t rusturl_set_host_and_port(rusturl_ptr url, const char *hostport, size_t len);
int32_t rusturl_set_host(rusturl_ptr url, const char *host, size_t len);
int32_t rusturl_set_port(rusturl_ptr url, const char *port, size_t len);
int32_t rusturl_set_port_no(rusturl_ptr url, const int32_t port);
int32_t rusturl_set_path(rusturl_ptr url, const char *path, size_t len);
int32_t rusturl_set_query(rusturl_ptr url, const char *path, size_t len);
int32_t rusturl_set_fragment(rusturl_ptr url, const char *path, size_t len);
int32_t rusturl_resolve(rusturl_ptr url, const char *relative, size_t len, void*);
int32_t rusturl_common_base_spec(rusturl_ptr url1, rusturl_ptr url2, void*);
int32_t rusturl_relative_spec(rusturl_ptr url1, rusturl_ptr url2, void*);
#ifdef __cplusplus
}
#endif
#endif // __RUST_URL_CAPI

View File

@ -1,57 +0,0 @@
extern crate libc;
use libc::size_t;
extern crate std;
use std::ptr;
use error_mapping::*;
extern "C" {
fn c_fn_set_size(user: *mut libc::c_void, size: size_t) -> i32;
fn c_fn_get_buffer(user: *mut libc::c_void) -> *mut libc::c_char;
}
pub trait StringContainer {
fn set_size(&self, size_t) -> i32;
fn get_buffer(&self) -> *mut libc::c_char;
fn assign(&self, content: &str) -> i32;
}
impl StringContainer for *mut libc::c_void {
fn set_size(&self, size: size_t) -> i32 {
if (*self).is_null() {
return NSError::InvalidArg.error_code();
}
unsafe {
c_fn_set_size(*self, size);
}
return NSError::OK.error_code();
}
fn get_buffer(&self) -> *mut libc::c_char {
if (*self).is_null() {
return 0 as *mut libc::c_char;
}
unsafe {
c_fn_get_buffer(*self)
}
}
fn assign(&self, content: &str) -> i32 {
if (*self).is_null() {
return NSError::InvalidArg.error_code();
}
unsafe {
let slice = content.as_bytes();
c_fn_set_size(*self, slice.len());
let buf = c_fn_get_buffer(*self);
if buf.is_null() {
return NSError::Failure.error_code();
}
ptr::copy(slice.as_ptr(), buf as *mut u8, slice.len());
}
NSError::OK.error_code()
}
}

View File

@ -1,4 +0,0 @@
all:
cd .. && cargo build
g++ -Wall -o test test.cpp ../target/debug/librust*.a -ldl -lpthread -lrt -lgcc_s -lpthread -lc -lm -std=c++0x
./test

View File

@ -1,141 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "../src/rust-url-capi.h"
class StringContainer
{
public:
StringContainer()
{
mBuffer = nullptr;
mLength = 0;
}
~StringContainer()
{
free(mBuffer);
mBuffer = nullptr;
}
void SetSize(size_t size)
{
mLength = size;
if (mBuffer) {
mBuffer = (char *)realloc(mBuffer, size);
return;
}
mBuffer = (char *)malloc(size);
}
char * GetBuffer()
{
return mBuffer;
}
void CheckEquals(const char * ref) {
int32_t refLen = strlen(ref);
printf("CheckEquals: %s (len:%d)\n", ref, refLen);
if (refLen != mLength || strncmp(mBuffer, ref, mLength)) {
printf("\t--- ERROR ---\n");
printf("Got : ");
fwrite(mBuffer, mLength, 1, stdout);
printf(" (len:%d)\n", mLength);
exit(-1);
}
printf("-> OK\n");
}
private:
int32_t mLength;
char * mBuffer;
};
extern "C" int32_t c_fn_set_size(void * container, size_t size)
{
((StringContainer *) container)->SetSize(size);
return 0;
}
extern "C" char * c_fn_get_buffer(void * container)
{
return ((StringContainer *) container)->GetBuffer();
}
#define TEST_CALL(func, expected) \
{ \
int32_t code = func; \
printf("%s -> code %d\n", #func, code); \
assert(code == expected); \
printf("-> OK\n"); \
} \
int main() {
// Create URL
rusturl_ptr url = rusturl_new("http://example.com/path/some/file.txt",
strlen("http://example.com/path/some/file.txt"));
assert(url); // Check we have a URL
StringContainer container;
TEST_CALL(rusturl_get_spec(url, &container), 0);
container.CheckEquals("http://example.com/path/some/file.txt");
TEST_CALL(rusturl_set_host(url, "test.com", strlen("test.com")), 0);
TEST_CALL(rusturl_get_host(url, &container), 0);
container.CheckEquals("test.com");
TEST_CALL(rusturl_get_path(url, &container), 0);
container.CheckEquals("/path/some/file.txt");
TEST_CALL(rusturl_set_path(url, "hello/../else.txt", strlen("hello/../else.txt")), 0);
TEST_CALL(rusturl_get_path(url, &container), 0);
container.CheckEquals("/else.txt");
TEST_CALL(rusturl_resolve(url, "./bla/file.txt", strlen("./bla/file.txt"), &container), 0);
container.CheckEquals("http://test.com/bla/file.txt");
TEST_CALL(rusturl_get_scheme(url, &container), 0);
container.CheckEquals("http");
TEST_CALL(rusturl_set_username(url, "user", strlen("user")), 0);
TEST_CALL(rusturl_get_username(url, &container), 0);
container.CheckEquals("user");
TEST_CALL(rusturl_get_spec(url, &container), 0);
container.CheckEquals("http://user@test.com/else.txt");
TEST_CALL(rusturl_set_password(url, "pass", strlen("pass")), 0);
TEST_CALL(rusturl_get_password(url, &container), 0);
container.CheckEquals("pass");
TEST_CALL(rusturl_get_spec(url, &container), 0);
container.CheckEquals("http://user:pass@test.com/else.txt");
TEST_CALL(rusturl_set_username(url, "", strlen("")), 0);
TEST_CALL(rusturl_set_password(url, "", strlen("")), 0);
TEST_CALL(rusturl_get_spec(url, &container), 0);
container.CheckEquals("http://test.com/else.txt");
TEST_CALL(rusturl_set_host_and_port(url, "example.org:1234", strlen("example.org:1234")), 0);
TEST_CALL(rusturl_get_host(url, &container), 0);
container.CheckEquals("example.org");
assert(rusturl_get_port(url) == 1234);
TEST_CALL(rusturl_set_port(url, "9090", strlen("9090")), 0);
assert(rusturl_get_port(url) == 9090);
TEST_CALL(rusturl_set_query(url, "x=1", strlen("x=1")), 0);
TEST_CALL(rusturl_get_query(url, &container), 0);
container.CheckEquals("x=1");
TEST_CALL(rusturl_set_fragment(url, "fragment", strlen("fragment")), 0);
TEST_CALL(rusturl_get_fragment(url, &container), 0);
container.CheckEquals("fragment");
TEST_CALL(rusturl_get_spec(url, &container), 0);
container.CheckEquals("http://example.org:9090/else.txt?x=1#fragment");
// Free the URL
rusturl_free(url);
url = rusturl_new("http://example.com/#",
strlen("http://example.com/#"));
assert(url); // Check we have a URL
assert(rusturl_has_fragment(url) == 1);
TEST_CALL(rusturl_set_fragment(url, "", 0), 0);
assert(rusturl_has_fragment(url) == 0);
TEST_CALL(rusturl_get_spec(url, &container), 0);
container.CheckEquals("http://example.com/");
rusturl_free(url);
printf("SUCCESS\n");
return 0;
}

View File

@ -1 +0,0 @@
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","Cargo.toml":"d0aad6852ec947597bab20cd85ad15e014044cb7717c9a91f4b8da0081b7134d","src/IdnaMappingTable.txt":"5e9f5929130b713e698162ac5b60a99ccfb831606686b1c50777cd920b55dee2","src/lib.rs":"b8e85707a40e8472d2e90849a1e0e24a7442f3c4614a57a60125f87d11e985a4","src/make_uts46_mapping_table.py":"36fa77c443672f15872d60438f96b7302eae28ec506b60a892579debc79b8e39","src/punycode.rs":"df883ec00b35cab38f96992667eef0767d8587746bfdab0613a03e4c49a26c16","src/uts46.rs":"c879570c511f210565ac0dbce8a212be6503e62cd7ed698830804bb7729e98de","src/uts46_mapping_table.rs":"daa59e4b6399a738f73967b222b7dce6c9706a471d306330d77380c89089fa24","tests/IdnaTest.txt":"12e7e150b04a7a2cb1f9b72222174844342218807126e9dbc53069505a5f6000","tests/punycode.rs":"2f4086411c00b0641377afe81071e51a695110a0cce474287557738c07f74322","tests/punycode_tests.json":"3d4ac0cf25984c37b9ce197f5df680a0136f728fb8ec82bc76624e42139eb3a8","tests/tests.rs":"bb92e129dc5e17e9a86ec6062dd7b3f4c905c4af69e773d7c70efea177654c7b","tests/uts46.rs":"be9f928c60b88a8e277ddfb1769f09a8cd273e2e120e8450fb9b34f4dc852b37"},"package":"1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"}

View File

View File

@ -1,24 +0,0 @@
[package]
name = "idna"
version = "0.1.0"
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
description = "IDNA (Internationalizing Domain Names in Applications) and Punycode."
repository = "https://github.com/servo/rust-url/"
license = "MIT/Apache-2.0"
[lib]
doctest = false
test = false
[[test]]
name = "tests"
harness = false
[dev-dependencies]
rustc-test = "0.1"
rustc-serialize = "0.3"
[dependencies]
unicode-bidi = "0.2.3"
unicode-normalization = "0.1.2"
matches = "0.1"

File diff suppressed because it is too large Load Diff

View File

@ -1,73 +0,0 @@
// Copyright 2016 Simon Sapin.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! This Rust crate implements IDNA
//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
//!
//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
//!
//! Quoting from [UTS #46s introduction](http://www.unicode.org/reports/tr46/#Introduction):
//!
//! > Initially, domain names were restricted to ASCII characters.
//! > A system was introduced in 2003 for internationalized domain names (IDN).
//! > This system is called Internationalizing Domain Names for Applications,
//! > or IDNA2003 for short.
//! > This mechanism supports IDNs by means of a client software transformation
//! > into a format known as Punycode.
//! > A revision of IDNA was approved in 2010 (IDNA2008).
//! > This revision has a number of incompatibilities with IDNA2003.
//! >
//! > The incompatibilities force implementers of client software,
//! > such as browsers and emailers,
//! > to face difficult choices during the transition period
//! > as registries shift from IDNA2003 to IDNA2008.
//! > This document specifies a mechanism
//! > that minimizes the impact of this transition for client software,
//! > allowing client software to access domains that are valid under either system.
#[macro_use] extern crate matches;
extern crate unicode_bidi;
extern crate unicode_normalization;
pub mod punycode;
pub mod uts46;
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
///
/// Return the ASCII representation a domain name,
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
/// and using Punycode as necessary.
///
/// This process may fail.
pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
uts46::to_ascii(domain, uts46::Flags {
use_std3_ascii_rules: false,
transitional_processing: true, // XXX: switch when Firefox does
verify_dns_length: false,
})
}
/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
///
/// Return the Unicode representation of a domain name,
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
/// and decoding Punycode as necessary.
///
/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
/// but always returns a string for the mapped domain.
pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
uts46::to_unicode(domain, uts46::Flags {
use_std3_ascii_rules: false,
// Unused:
transitional_processing: true,
verify_dns_length: false,
})
}

View File

@ -1,56 +0,0 @@
# Copyright 2013-2014 Valentin Gosu.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
# Run as: python make_uts46_mapping_table.py IdnaMappingTable.txt > uts46_mapping_table.rs
# You can get the latest idna table from
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
print('''\
// Copyright 2013-2014 Valentin Gosu.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Generated by make_idna_table.py
static TABLE: &'static [Range] = &[
''')
txt = open("IdnaMappingTable.txt")
def char(s):
return (unichr(int(s, 16))
.encode('utf8')
.replace('\\', '\\\\')
.replace('"', '\\"')
.replace('\0', '\\0'))
for line in txt:
# remove comments
line, _, _ = line.partition('#')
# skip empty lines
if len(line.strip()) == 0:
continue
fields = line.split(';')
if fields[0].strip() == 'D800..DFFF':
continue # Surrogates don't occur in Rust strings.
first, _, last = fields[0].strip().partition('..')
if not last:
last = first
mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '')
if len(fields) > 2:
if fields[2].strip():
mapping += '("%s")' % ''.join(char(c) for c in fields[2].strip().split(' '))
elif mapping == "Deviation":
mapping += '("")'
print(" Range { from: '%s', to: '%s', mapping: %s }," % (char(first), char(last), mapping))
print("];")

View File

@ -1,213 +0,0 @@
// Copyright 2013 Simon Sapin.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
//!
//! Since Punycode fundamentally works on unicode code points,
//! `encode` and `decode` take and return slices and vectors of `char`.
//! `encode_str` and `decode_to_string` provide convenience wrappers
//! that convert from and to Rusts UTF-8 based `str` and `String` types.
use std::u32;
use std::char;
use std::ascii::AsciiExt;
// Bootstring parameters for Punycode
static BASE: u32 = 36;
static T_MIN: u32 = 1;
static T_MAX: u32 = 26;
static SKEW: u32 = 38;
static DAMP: u32 = 700;
static INITIAL_BIAS: u32 = 72;
static INITIAL_N: u32 = 0x80;
static DELIMITER: char = '-';
#[inline]
fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
delta /= if first_time { DAMP } else { 2 };
delta += delta / num_points;
let mut k = 0;
while delta > ((BASE - T_MIN) * T_MAX) / 2 {
delta /= BASE - T_MIN;
k += BASE;
}
k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
}
/// Convert Punycode to an Unicode `String`.
///
/// This is a convenience wrapper around `decode`.
#[inline]
pub fn decode_to_string(input: &str) -> Option<String> {
decode(input).map(|chars| chars.into_iter().collect())
}
/// Convert Punycode to Unicode.
///
/// Return None on malformed input or overflow.
/// Overflow can only happen on inputs that take more than
/// 63 encoded bytes, the DNS limit on domain name labels.
pub fn decode(input: &str) -> Option<Vec<char>> {
// Handle "basic" (ASCII) code points.
// They are encoded as-is before the last delimiter, if any.
let (mut output, input) = match input.rfind(DELIMITER) {
None => (Vec::new(), input),
Some(position) => (
input[..position].chars().collect(),
if position > 0 { &input[position + 1..] } else { input }
)
};
let mut code_point = INITIAL_N;
let mut bias = INITIAL_BIAS;
let mut i = 0;
let mut iter = input.bytes();
loop {
let previous_i = i;
let mut weight = 1;
let mut k = BASE;
let mut byte = match iter.next() {
None => break,
Some(byte) => byte,
};
// Decode a generalized variable-length integer into delta,
// which gets added to i.
loop {
let digit = match byte {
byte @ b'0' ... b'9' => byte - b'0' + 26,
byte @ b'A' ... b'Z' => byte - b'A',
byte @ b'a' ... b'z' => byte - b'a',
_ => return None
} as u32;
if digit > (u32::MAX - i) / weight {
return None // Overflow
}
i += digit * weight;
let t = if k <= bias { T_MIN }
else if k >= bias + T_MAX { T_MAX }
else { k - bias };
if digit < t {
break
}
if weight > u32::MAX / (BASE - t) {
return None // Overflow
}
weight *= BASE - t;
k += BASE;
byte = match iter.next() {
None => return None, // End of input before the end of this delta
Some(byte) => byte,
};
}
let length = output.len() as u32;
bias = adapt(i - previous_i, length + 1, previous_i == 0);
if i / (length + 1) > u32::MAX - code_point {
return None // Overflow
}
// i was supposed to wrap around from length+1 to 0,
// incrementing code_point each time.
code_point += i / (length + 1);
i %= length + 1;
let c = match char::from_u32(code_point) {
Some(c) => c,
None => return None
};
output.insert(i as usize, c);
i += 1;
}
Some(output)
}
/// Convert an Unicode `str` to Punycode.
///
/// This is a convenience wrapper around `encode`.
#[inline]
pub fn encode_str(input: &str) -> Option<String> {
encode(&input.chars().collect::<Vec<char>>())
}
/// Convert Unicode to Punycode.
///
/// Return None on overflow, which can only happen on inputs that would take more than
/// 63 encoded bytes, the DNS limit on domain name labels.
pub fn encode(input: &[char]) -> Option<String> {
// Handle "basic" (ASCII) code points. They are encoded as-is.
let output_bytes = input.iter().filter_map(|&c|
if c.is_ascii() { Some(c as u8) } else { None }
).collect();
let mut output = unsafe { String::from_utf8_unchecked(output_bytes) };
let basic_length = output.len() as u32;
if basic_length > 0 {
output.push_str("-")
}
let mut code_point = INITIAL_N;
let mut delta = 0;
let mut bias = INITIAL_BIAS;
let mut processed = basic_length;
let input_length = input.len() as u32;
while processed < input_length {
// All code points < code_point have been handled already.
// Find the next larger one.
let min_code_point = input.iter().map(|&c| c as u32)
.filter(|&c| c >= code_point).min().unwrap();
if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
return None // Overflow
}
// Increase delta to advance the decoders <code_point,i> state to <min_code_point,0>
delta += (min_code_point - code_point) * (processed + 1);
code_point = min_code_point;
for &c in input {
let c = c as u32;
if c < code_point {
delta += 1;
if delta == 0 {
return None // Overflow
}
}
if c == code_point {
// Represent delta as a generalized variable-length integer:
let mut q = delta;
let mut k = BASE;
loop {
let t = if k <= bias { T_MIN }
else if k >= bias + T_MAX { T_MAX }
else { k - bias };
if q < t {
break
}
let value = t + ((q - t) % (BASE - t));
value_to_digit(value, &mut output);
q = (q - t) / (BASE - t);
k += BASE;
}
value_to_digit(q, &mut output);
bias = adapt(delta, processed + 1, processed == basic_length);
delta = 0;
processed += 1;
}
}
delta += 1;
code_point += 1;
}
Some(output)
}
#[inline]
fn value_to_digit(value: u32, output: &mut String) {
let code_point = match value {
0 ... 25 => value + 0x61, // a..z
26 ... 35 => value - 26 + 0x30, // 0..9
_ => panic!()
};
unsafe { output.as_mut_vec().push(code_point as u8) }
}

View File

@ -1,322 +0,0 @@
// Copyright 2013-2014 Valentin Gosu.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! [*Unicode IDNA Compatibility Processing*
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
use self::Mapping::*;
use punycode;
use std::ascii::AsciiExt;
use unicode_normalization::UnicodeNormalization;
use unicode_normalization::char::is_combining_mark;
use unicode_bidi::{BidiClass, bidi_class};
include!("uts46_mapping_table.rs");
#[derive(Debug)]
enum Mapping {
Valid,
Ignored,
Mapped(&'static str),
Deviation(&'static str),
Disallowed,
DisallowedStd3Valid,
DisallowedStd3Mapped(&'static str),
}
struct Range {
from: char,
to: char,
mapping: Mapping,
}
fn find_char(codepoint: char) -> &'static Mapping {
let mut min = 0;
let mut max = TABLE.len() - 1;
while max > min {
let mid = (min + max) >> 1;
if codepoint > TABLE[mid].to {
min = mid;
} else if codepoint < TABLE[mid].from {
max = mid;
} else {
min = mid;
max = mid;
}
}
&TABLE[min].mapping
}
fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
match *find_char(codepoint) {
Mapping::Valid => output.push(codepoint),
Mapping::Ignored => {},
Mapping::Mapped(mapping) => output.push_str(mapping),
Mapping::Deviation(mapping) => {
if flags.transitional_processing {
output.push_str(mapping)
} else {
output.push(codepoint)
}
}
Mapping::Disallowed => {
errors.push(Error::DissallowedCharacter);
output.push(codepoint);
}
Mapping::DisallowedStd3Valid => {
if flags.use_std3_ascii_rules {
errors.push(Error::DissallowedByStd3AsciiRules);
}
output.push(codepoint)
}
Mapping::DisallowedStd3Mapped(mapping) => {
if flags.use_std3_ascii_rules {
errors.push(Error::DissallowedMappedInStd3);
}
output.push_str(mapping)
}
}
}
// http://tools.ietf.org/html/rfc5893#section-2
fn passes_bidi(label: &str, transitional_processing: bool) -> bool {
let mut chars = label.chars();
let class = match chars.next() {
Some(c) => bidi_class(c),
None => return true, // empty string
};
if class == BidiClass::L
|| (class == BidiClass::ON && transitional_processing) // starts with \u200D
|| (class == BidiClass::ES && transitional_processing) // hack: 1.35.+33.49
|| class == BidiClass::EN // hack: starts with number 0à.\u05D0
{ // LTR
// Rule 5
loop {
match chars.next() {
Some(c) => {
let c = bidi_class(c);
if !matches!(c, BidiClass::L | BidiClass::EN |
BidiClass::ES | BidiClass::CS |
BidiClass::ET | BidiClass::ON |
BidiClass::BN | BidiClass::NSM) {
return false;
}
},
None => { break; },
}
}
// Rule 6
let mut rev_chars = label.chars().rev();
let mut last = rev_chars.next();
loop { // must end in L or EN followed by 0 or more NSM
match last {
Some(c) if bidi_class(c) == BidiClass::NSM => {
last = rev_chars.next();
continue;
}
_ => { break; },
}
}
// TODO: does not pass for àˇ.\u05D0
// match last {
// Some(c) if bidi_class(c) == BidiClass::L
// || bidi_class(c) == BidiClass::EN => {},
// Some(c) => { return false; },
// _ => {}
// }
} else if class == BidiClass::R || class == BidiClass::AL { // RTL
let mut found_en = false;
let mut found_an = false;
// Rule 2
loop {
match chars.next() {
Some(c) => {
let char_class = bidi_class(c);
if char_class == BidiClass::EN {
found_en = true;
}
if char_class == BidiClass::AN {
found_an = true;
}
if !matches!(char_class, BidiClass::R | BidiClass::AL |
BidiClass::AN | BidiClass::EN |
BidiClass::ES | BidiClass::CS |
BidiClass::ET | BidiClass::ON |
BidiClass::BN | BidiClass::NSM) {
return false;
}
},
None => { break; },
}
}
// Rule 3
let mut rev_chars = label.chars().rev();
let mut last = rev_chars.next();
loop { // must end in L or EN followed by 0 or more NSM
match last {
Some(c) if bidi_class(c) == BidiClass::NSM => {
last = rev_chars.next();
continue;
}
_ => { break; },
}
}
match last {
Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL |
BidiClass::EN | BidiClass::AN) => {},
_ => { return false; }
}
// Rule 4
if found_an && found_en {
return false;
}
} else {
// Rule 2: Should start with L or R/AL
return false;
}
return true;
}
/// http://www.unicode.org/reports/tr46/#Validity_Criteria
fn validate(label: &str, flags: Flags, errors: &mut Vec<Error>) {
if label.nfc().ne(label.chars()) {
errors.push(Error::ValidityCriteria);
}
// Can not contain '.' since the input is from .split('.')
if {
let mut chars = label.chars().skip(2);
let third = chars.next();
let fourth = chars.next();
(third, fourth) == (Some('-'), Some('-'))
} || label.starts_with("-")
|| label.ends_with("-")
|| label.chars().next().map_or(false, is_combining_mark)
|| label.chars().any(|c| match *find_char(c) {
Mapping::Valid => false,
Mapping::Deviation(_) => flags.transitional_processing,
Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
_ => true,
})
|| !passes_bidi(label, flags.transitional_processing)
{
errors.push(Error::ValidityCriteria)
}
}
/// http://www.unicode.org/reports/tr46/#Processing
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
let mut mapped = String::new();
for c in domain.chars() {
map_char(c, flags, &mut mapped, errors)
}
let normalized: String = mapped.nfc().collect();
let mut validated = String::new();
for label in normalized.split('.') {
if validated.len() > 0 {
validated.push('.');
}
if label.starts_with("xn--") {
match punycode::decode_to_string(&label["xn--".len()..]) {
Some(decoded_label) => {
let flags = Flags { transitional_processing: false, ..flags };
validate(&decoded_label, flags, errors);
validated.push_str(&decoded_label)
}
None => errors.push(Error::PunycodeError)
}
} else {
validate(label, flags, errors);
validated.push_str(label)
}
}
validated
}
#[derive(Copy, Clone)]
pub struct Flags {
pub use_std3_ascii_rules: bool,
pub transitional_processing: bool,
pub verify_dns_length: bool,
}
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum Error {
PunycodeError,
ValidityCriteria,
DissallowedByStd3AsciiRules,
DissallowedMappedInStd3,
DissallowedCharacter,
TooLongForDns,
}
/// Errors recorded during UTS #46 processing.
///
/// This is opaque for now, only indicating the precense of at least one error.
/// More details may be exposed in the future.
#[derive(Debug)]
pub struct Errors(Vec<Error>);
/// http://www.unicode.org/reports/tr46/#ToASCII
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
let mut errors = Vec::new();
let mut result = String::new();
for label in processing(domain, flags, &mut errors).split('.') {
if result.len() > 0 {
result.push('.');
}
if label.is_ascii() {
result.push_str(label);
} else {
match punycode::encode_str(label) {
Some(x) => {
result.push_str("xn--");
result.push_str(&x);
},
None => errors.push(Error::PunycodeError)
}
}
}
if flags.verify_dns_length {
let domain = if result.ends_with(".") { &result[..result.len()-1] } else { &*result };
if domain.len() < 1 || domain.len() > 253 ||
domain.split('.').any(|label| label.len() < 1 || label.len() > 63) {
errors.push(Error::TooLongForDns)
}
}
if errors.is_empty() {
Ok(result)
} else {
Err(Errors(errors))
}
}
/// http://www.unicode.org/reports/tr46/#ToUnicode
///
/// Only `use_std3_ascii_rules` is used in `flags`.
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
flags.transitional_processing = false;
let mut errors = Vec::new();
let domain = processing(domain, flags, &mut errors);
let errors = if errors.is_empty() {
Ok(())
} else {
Err(Errors(errors))
};
(domain, errors)
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,65 +0,0 @@
// Copyright 2013 Simon Sapin.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use idna::punycode::{decode, encode_str};
use rustc_serialize::json::{Json, Object};
use test::TestFn;
fn one_test(decoded: &str, encoded: &str) {
match decode(encoded) {
None => panic!("Decoding {} failed.", encoded),
Some(result) => {
let result = result.into_iter().collect::<String>();
assert!(result == decoded,
format!("Incorrect decoding of \"{}\":\n \"{}\"\n!= \"{}\"\n",
encoded, result, decoded))
}
}
match encode_str(decoded) {
None => panic!("Encoding {} failed.", decoded),
Some(result) => {
assert!(result == encoded,
format!("Incorrect encoding of \"{}\":\n \"{}\"\n!= \"{}\"\n",
decoded, result, encoded))
}
}
}
fn get_string<'a>(map: &'a Object, key: &str) -> &'a str {
match map.get(&key.to_string()) {
Some(&Json::String(ref s)) => s,
None => "",
_ => panic!(),
}
}
pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
match Json::from_str(include_str!("punycode_tests.json")) {
Ok(Json::Array(tests)) => for (i, test) in tests.into_iter().enumerate() {
match test {
Json::Object(o) => {
let test_name = {
let desc = get_string(&o, "description");
if desc.is_empty() {
format!("Punycode {}", i + 1)
} else {
format!("Punycode {}: {}", i + 1, desc)
}
};
add_test(test_name, TestFn::dyn_test_fn(move || one_test(
get_string(&o, "decoded"),
get_string(&o, "encoded"),
)))
}
_ => panic!(),
}
},
other => panic!("{:?}", other)
}
}

View File

@ -1,120 +0,0 @@
[
{
"description": "These tests are copied from https://github.com/bestiejs/punycode.js/blob/master/tests/tests.js , used under the MIT license.",
"decoded": "",
"encoded": ""
},
{
"description": "a single basic code point",
"decoded": "Bach",
"encoded": "Bach-"
},
{
"description": "a single non-ASCII character",
"decoded": "\u00FC",
"encoded": "tda"
},
{
"description": "multiple non-ASCII characters",
"decoded": "\u00FC\u00EB\u00E4\u00F6\u2665",
"encoded": "4can8av2009b"
},
{
"description": "mix of ASCII and non-ASCII characters",
"decoded": "b\u00FCcher",
"encoded": "bcher-kva"
},
{
"description": "long string with both ASCII and non-ASCII characters",
"decoded": "Willst du die Bl\u00FCthe des fr\u00FChen, die Fr\u00FCchte des sp\u00E4teren Jahres",
"encoded": "Willst du die Blthe des frhen, die Frchte des spteren Jahres-x9e96lkal"
},
{
"description": "Arabic (Egyptian)",
"decoded": "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
"encoded": "egbpdaj6bu4bxfgehfvwxn"
},
{
"description": "Chinese (simplified)",
"decoded": "\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2d\u6587",
"encoded": "ihqwcrb4cv8a8dqg056pqjye"
},
{
"description": "Chinese (traditional)",
"decoded": "\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
"encoded": "ihqwctvzc91f659drss3x8bo0yb"
},
{
"description": "Czech",
"decoded": "Pro\u010Dprost\u011Bnemluv\u00ED\u010Desky",
"encoded": "Proprostnemluvesky-uyb24dma41a"
},
{
"description": "Hebrew",
"decoded": "\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2\u05D1\u05E8\u05D9\u05EA",
"encoded": "4dbcagdahymbxekheh6e0a7fei0b"
},
{
"description": "Hindi (Devanagari)",
"decoded": "\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947\u0939\u0948\u0902",
"encoded": "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"
},
{
"description": "Japanese (kanji and hiragana)",
"decoded": "\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
"encoded": "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"
},
{
"description": "Korean (Hangul syllables)",
"decoded": "\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
"encoded": "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"
},
{
"description": "Russian (Cyrillic)",
"decoded": "\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A\u0438",
"encoded": "b1abfaaepdrnnbgefbadotcwatmq2g4l"
},
{
"description": "Spanish",
"decoded": "Porqu\u00E9nopuedensimplementehablarenEspa\u00F1ol",
"encoded": "PorqunopuedensimplementehablarenEspaol-fmd56a"
},
{
"description": "Vietnamese",
"decoded": "T\u1EA1isaoh\u1ECDkh\u00F4ngth\u1EC3ch\u1EC9n\u00F3iti\u1EBFngVi\u1EC7t",
"encoded": "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"
},
{
"decoded": "3\u5E74B\u7D44\u91D1\u516B\u5148\u751F",
"encoded": "3B-ww4c5e180e575a65lsy2b"
},
{
"decoded": "\u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS",
"encoded": "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"
},
{
"decoded": "Hello-Another-Way-\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
"encoded": "Hello-Another-Way--fc4qua05auwb3674vfr0b"
},
{
"decoded": "\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B2",
"encoded": "2-u9tlzr9756bt3uc0v"
},
{
"decoded": "Maji\u3067Koi\u3059\u308B5\u79D2\u524D",
"encoded": "MajiKoi5-783gue6qz075azm5e"
},
{
"decoded": "\u30D1\u30D5\u30A3\u30FCde\u30EB\u30F3\u30D0",
"encoded": "de-jg4avhby1noc0d"
},
{
"decoded": "\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
"encoded": "d9juau41awczczp"
},
{
"description": "ASCII string that breaks the existing rules for host-name labels (It's not a realistic example for IDNA, because IDNA never encodes pure ASCII labels.)",
"decoded": "-> $1.00 <-",
"encoded": "-> $1.00 <--"
}
]

View File

@ -1,25 +0,0 @@
extern crate idna;
extern crate rustc_serialize;
extern crate test;
mod punycode;
mod uts46;
fn main() {
let mut tests = Vec::new();
{
let mut add_test = |name, run| {
tests.push(test::TestDescAndFn {
desc: test::TestDesc {
name: test::DynTestName(name),
ignore: false,
should_panic: test::ShouldPanic::No,
},
testfn: run,
})
};
punycode::collect_tests(&mut add_test);
uts46::collect_tests(&mut add_test);
}
test::test_main(&std::env::args().collect::<Vec<_>>(), tests)
}

View File

@ -1,117 +0,0 @@
// Copyright 2013-2014 Valentin Gosu.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::char;
use idna::uts46;
use test::TestFn;
pub fn collect_tests<F: FnMut(String, TestFn)>(add_test: &mut F) {
// http://www.unicode.org/Public/idna/latest/IdnaTest.txt
for (i, line) in include_str!("IdnaTest.txt").lines().enumerate() {
if line == "" || line.starts_with("#") {
continue
}
// Remove comments
let mut line = match line.find("#") {
Some(index) => &line[0..index],
None => line
};
let mut expected_failure = false;
if line.starts_with("XFAIL") {
expected_failure = true;
line = &line[5..line.len()];
};
let mut pieces = line.split(';').map(|x| x.trim()).collect::<Vec<&str>>();
let test_type = pieces.remove(0);
let original = pieces.remove(0);
let source = unescape(original);
let to_unicode = pieces.remove(0);
let to_ascii = pieces.remove(0);
let nv8 = if pieces.len() > 0 { pieces.remove(0) } else { "" };
if expected_failure {
continue;
}
let test_name = format!("UTS #46 line {}", i + 1);
add_test(test_name, TestFn::dyn_test_fn(move || {
let result = uts46::to_ascii(&source, uts46::Flags {
use_std3_ascii_rules: true,
transitional_processing: test_type == "T",
verify_dns_length: true,
});
if to_ascii.starts_with("[") {
if to_ascii.starts_with("[C") {
// http://unicode.org/reports/tr46/#Deviations
// applications that perform IDNA2008 lookup are not required to check
// for these contexts
return;
}
let res = result.ok();
assert!(res == None, "Expected error. result: {} | original: {} | source: {}",
res.unwrap(), original, source);
return;
}
let to_ascii = if to_ascii.len() > 0 {
to_ascii.to_string()
} else {
if to_unicode.len() > 0 {
to_unicode.to_string()
} else {
source.clone()
}
};
if nv8 == "NV8" {
// This result isn't valid under IDNA2008. Skip it
return;
}
assert!(result.is_ok(), "Couldn't parse {} | original: {} | error: {:?}",
source, original, result.err());
let output = result.ok().unwrap();
assert!(output == to_ascii, "result: {} | expected: {} | original: {} | source: {}",
output, to_ascii, original, source);
}))
}
}
fn unescape(input: &str) -> String {
let mut output = String::new();
let mut chars = input.chars();
loop {
match chars.next() {
None => return output,
Some(c) =>
if c == '\\' {
match chars.next().unwrap() {
'\\' => output.push('\\'),
'u' => {
let c1 = chars.next().unwrap().to_digit(16).unwrap();
let c2 = chars.next().unwrap().to_digit(16).unwrap();
let c3 = chars.next().unwrap().to_digit(16).unwrap();
let c4 = chars.next().unwrap().to_digit(16).unwrap();
match char::from_u32((((c1 * 16 + c2) * 16 + c3) * 16 + c4))
{
Some(c) => output.push(c),
None => { output.push_str(&format!("\\u{:X}{:X}{:X}{:X}",c1,c2,c3,c4)); }
};
}
_ => panic!("Invalid test data input"),
}
} else {
output.push(c);
}
}
}
}

View File

@ -1 +0,0 @@
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","Cargo.toml":"a55cb9efcb3820aa9fcdff157bffaa287988ea9d4cec33e638fa7fdd978c7742","lib.rs":"d927e99fc4fd88b0e30b44580384b411e63bc504e9953ac7cc85029437372266"},"package":"bcc3ad8109fa4b522f9b0cd81440422781f564aaf8c195de6b9d6642177ad0dd"}

View File

View File

@ -1,12 +0,0 @@
[package]
name = "matches"
version = "0.1.3"
authors = ["Simon Sapin <simon.sapin@exyr.org>"]
license = "MIT"
repository = "https://github.com/SimonSapin/rust-std-candidates"
description = "A macro to evaluate, as a boolean, whether an expression matches a pattern."
[lib]
name = "matches"
path = "lib.rs"
doctest = false

View File

@ -1,62 +0,0 @@
#[macro_export]
macro_rules! matches {
($expression:expr, $($pattern:tt)+) => {
_matches_tt_as_expr_hack! {
match $expression {
$($pattern)+ => true,
_ => false
}
}
}
}
/// Work around "error: unexpected token: `an interpolated tt`", whatever that means.
#[macro_export]
macro_rules! _matches_tt_as_expr_hack {
($value:expr) => ($value)
}
#[macro_export]
macro_rules! assert_matches {
($expression:expr, $($pattern:tt)+) => {
_matches_tt_as_expr_hack! {
match $expression {
$($pattern)+ => (),
ref e => panic!("assertion failed: `{:?}` does not match `{}`", e, stringify!($($pattern)+)),
}
}
}
}
#[macro_export]
macro_rules! debug_assert_matches {
($($arg:tt)*) => (if cfg!(debug_assertions) { assert_matches!($($arg)*); })
}
#[test]
fn matches_works() {
let foo = Some("-12");
assert!(matches!(foo, Some(bar) if
matches!(bar.as_bytes()[0], b'+' | b'-') &&
matches!(bar.as_bytes()[1], b'0'...b'9')
));
}
#[test]
fn assert_matches_works() {
let foo = Some("-12");
assert_matches!(foo, Some(bar) if
matches!(bar.as_bytes()[0], b'+' | b'-') &&
matches!(bar.as_bytes()[1], b'0'...b'9')
);
}
#[test]
#[should_panic(expected = "assertion failed: `Some(\"-AB\")` does not match ")]
fn assert_matches_panics() {
let foo = Some("-AB");
assert_matches!(foo, Some(bar) if
matches!(bar.as_bytes()[0], b'+' | b'-') &&
matches!(bar.as_bytes()[1], b'0'...b'9')
);
}

View File

@ -1 +0,0 @@
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"f39078a8fd87a9cdecaf02e9f817d2e3320cc150c557faf0abf39791e5ac4da1",".travis.yml":"5c3f9b92ab6ca79a98400b8c344f3a06fcc096b5df6f49fbae1a83dceb540170","AUTHORS":"6029da5a270f7047db642cfdaa521acdfd93dcae9f9cd02dc1bd43dfa526fc18","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.toml":"d08a56cbe64f6b2e82c0f32cf5bdd4f8e64acc9cc5ecfb610a5f65c2c0b2f4b8","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"e764abd9a6abbe9517dd9dedc003dd64594351282437a509fec9362ef136686d","src/BidiCharacterTest.txt":"baadfd1464e5b16586cf561dc6f0bc1bcb204efbd88b0fded7f197d106c8dd00","src/BidiTest.txt":"4697ad750044c2c05004350eee3f3ac2c9543615b9b43a02c235d8ff89a9b951","src/lib.rs":"228c34bd2e011b8d7cd7438f0d2e416b3ce80cf2dea516ad35394850d12a273a","src/tables.rs":"905aabf453f3a997fcb8784bbc2271ff61d587a8ce0492d140f144f475da0ddd","tools/generate.py":"30aaf9c8c4cf7c164d3b98af83159ba195b6fbf26135f0bacde1c4e225d8845d"},"package":"c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"}

View File

@ -1,4 +0,0 @@
Cargo.lock
target
src/ReadMe.txt
src/UnicodeData.txt

View File

@ -1,9 +0,0 @@
sudo: false
language: rust
rust:
- nightly
- beta
- stable
notifications:
webhooks: http://build.servo.org:54856/travis

View File

@ -1,3 +0,0 @@
This software was written by the following people:
Matt Brubeck <mbrubeck@limpet.net>

View File

@ -1,8 +0,0 @@
This project is copyright 2015, The Servo Project Developers (given in the
file AUTHORS).
Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
http://opensource.org/licenses/MIT>, at your option. All files in the project
carrying such notice may not be copied, modified, or distributed except
according to those terms.

View File

@ -1,15 +0,0 @@
[package]
name = "unicode-bidi"
version = "0.2.3"
authors = ["The Servo Project Developers"]
license = "MIT / Apache-2.0"
description = "Implementation of the Unicode Bidirectional Algorithm"
repository = "https://github.com/servo/unicode-bidi"
documentation = "http://doc.servo.org/unicode_bidi/"
keywords = ["rtl", "unicode", "text", "layout", "bidi"]
[lib]
name = "unicode_bidi"
[dependencies]
matches = "0.1"

View File

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,25 +0,0 @@
Copyright (c) 2015 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -1,11 +0,0 @@
# unicode-bidi
This crate implements the [Unicode Bidirectional Algorithm][tr9] for display
of mixed right-to-left and left-to-right text. It is written in safe Rust,
compatible with the current stable release.
[Documentation](http://doc.servo.org/unicode_bidi/)
[![Build Status](https://travis-ci.org/servo/unicode-bidi.svg?branch=master)](https://travis-ci.org/servo/unicode-bidi)
[tr9]: http://www.unicode.org/reports/tr9/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,477 +0,0 @@
// NOTE:
// The following code was generated by "tools/generate.py". do not edit directly
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
/// The version of [Unicode](http://www.unicode.org/)
/// that the `bidi_class` function is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = (8, 0, 0);
pub use self::BidiClass::*;
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
/// Represents the Unicode character property **Bidi_Class**, also known as
/// the *bidirectional character type*.
///
/// Use the `bidi_class` function to look up the BidiClass of a code point.
///
/// http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
pub enum BidiClass {
AL,
AN,
B,
BN,
CS,
EN,
ES,
ET,
FSI,
L,
LRE,
LRI,
LRO,
NSM,
ON,
PDF,
PDI,
R,
RLE,
RLI,
RLO,
S,
WS,
}
fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
use ::std::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Ok(idx) => {
let (_, _, cat) = r[idx];
cat
}
// UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
// for Bidi_Class have the value Left_To_Right (L)."
Err(_) => L
}
}
/// Find the BidiClass of a single char.
pub fn bidi_class(c: char) -> BidiClass {
bsearch_range_value_table(c, bidi_class_table)
}
const bidi_class_table: &'static [(char, char, BidiClass)] = &[
('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S),
('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}',
B), ('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}',
'\u{25}', ET), ('\u{26}', '\u{2a}', ON), ('\u{2b}', '\u{2b}', ES), ('\u{2c}', '\u{2c}', CS),
('\u{2d}', '\u{2d}', ES), ('\u{2e}', '\u{2f}', CS), ('\u{30}', '\u{39}', EN), ('\u{3a}',
'\u{3a}', CS), ('\u{3b}', '\u{40}', ON), ('\u{41}', '\u{5a}', L), ('\u{5b}', '\u{60}', ON),
('\u{61}', '\u{7a}', L), ('\u{7b}', '\u{7e}', ON), ('\u{7f}', '\u{84}', BN), ('\u{85}',
'\u{85}', B), ('\u{86}', '\u{9f}', BN), ('\u{a0}', '\u{a0}', CS), ('\u{a1}', '\u{a1}', ON),
('\u{a2}', '\u{a5}', ET), ('\u{a6}', '\u{a9}', ON), ('\u{aa}', '\u{aa}', L), ('\u{ab}',
'\u{ac}', ON), ('\u{ad}', '\u{ad}', BN), ('\u{ae}', '\u{af}', ON), ('\u{b0}', '\u{b1}', ET),
('\u{b2}', '\u{b3}', EN), ('\u{b4}', '\u{b4}', ON), ('\u{b5}', '\u{b5}', L), ('\u{b6}',
'\u{b8}', ON), ('\u{b9}', '\u{b9}', EN), ('\u{ba}', '\u{ba}', L), ('\u{bb}', '\u{bf}', ON),
('\u{c0}', '\u{d6}', L), ('\u{d7}', '\u{d7}', ON), ('\u{d8}', '\u{f6}', L), ('\u{f7}',
'\u{f7}', ON), ('\u{f8}', '\u{2b8}', L), ('\u{2b9}', '\u{2ba}', ON), ('\u{2bb}', '\u{2c1}',
L), ('\u{2c2}', '\u{2cf}', ON), ('\u{2d0}', '\u{2d1}', L), ('\u{2d2}', '\u{2df}', ON),
('\u{2e0}', '\u{2e4}', L), ('\u{2e5}', '\u{2ed}', ON), ('\u{2ee}', '\u{2ee}', L),
('\u{2ef}', '\u{2ff}', ON), ('\u{300}', '\u{36f}', NSM), ('\u{370}', '\u{373}', L),
('\u{374}', '\u{375}', ON), ('\u{376}', '\u{377}', L), ('\u{37a}', '\u{37d}', L),
('\u{37e}', '\u{37e}', ON), ('\u{37f}', '\u{37f}', L), ('\u{384}', '\u{385}', ON),
('\u{386}', '\u{386}', L), ('\u{387}', '\u{387}', ON), ('\u{388}', '\u{38a}', L),
('\u{38c}', '\u{38c}', L), ('\u{38e}', '\u{3a1}', L), ('\u{3a3}', '\u{3f5}', L), ('\u{3f6}',
'\u{3f6}', ON), ('\u{3f7}', '\u{482}', L), ('\u{483}', '\u{489}', NSM), ('\u{48a}',
'\u{52f}', L), ('\u{531}', '\u{556}', L), ('\u{559}', '\u{55f}', L), ('\u{561}', '\u{587}',
L), ('\u{589}', '\u{589}', L), ('\u{58a}', '\u{58a}', ON), ('\u{58d}', '\u{58e}', ON),
('\u{58f}', '\u{58f}', ET), ('\u{590}', '\u{590}', R), ('\u{591}', '\u{5bd}', NSM),
('\u{5be}', '\u{5be}', R), ('\u{5bf}', '\u{5bf}', NSM), ('\u{5c0}', '\u{5c0}', R),
('\u{5c1}', '\u{5c2}', NSM), ('\u{5c3}', '\u{5c3}', R), ('\u{5c4}', '\u{5c5}', NSM),
('\u{5c6}', '\u{5c6}', R), ('\u{5c7}', '\u{5c7}', NSM), ('\u{5c8}', '\u{5ff}', R),
('\u{600}', '\u{605}', AN), ('\u{606}', '\u{607}', ON), ('\u{608}', '\u{608}', AL),
('\u{609}', '\u{60a}', ET), ('\u{60b}', '\u{60b}', AL), ('\u{60c}', '\u{60c}', CS),
('\u{60d}', '\u{60d}', AL), ('\u{60e}', '\u{60f}', ON), ('\u{610}', '\u{61a}', NSM),
('\u{61b}', '\u{64a}', AL), ('\u{64b}', '\u{65f}', NSM), ('\u{660}', '\u{669}', AN),
('\u{66a}', '\u{66a}', ET), ('\u{66b}', '\u{66c}', AN), ('\u{66d}', '\u{66f}', AL),
('\u{670}', '\u{670}', NSM), ('\u{671}', '\u{6d5}', AL), ('\u{6d6}', '\u{6dc}', NSM),
('\u{6dd}', '\u{6dd}', AN), ('\u{6de}', '\u{6de}', ON), ('\u{6df}', '\u{6e4}', NSM),
('\u{6e5}', '\u{6e6}', AL), ('\u{6e7}', '\u{6e8}', NSM), ('\u{6e9}', '\u{6e9}', ON),
('\u{6ea}', '\u{6ed}', NSM), ('\u{6ee}', '\u{6ef}', AL), ('\u{6f0}', '\u{6f9}', EN),
('\u{6fa}', '\u{710}', AL), ('\u{711}', '\u{711}', NSM), ('\u{712}', '\u{72f}', AL),
('\u{730}', '\u{74a}', NSM), ('\u{74b}', '\u{7a5}', AL), ('\u{7a6}', '\u{7b0}', NSM),
('\u{7b1}', '\u{7bf}', AL), ('\u{7c0}', '\u{7ea}', R), ('\u{7eb}', '\u{7f3}', NSM),
('\u{7f4}', '\u{7f5}', R), ('\u{7f6}', '\u{7f9}', ON), ('\u{7fa}', '\u{815}', R),
('\u{816}', '\u{819}', NSM), ('\u{81a}', '\u{81a}', R), ('\u{81b}', '\u{823}', NSM),
('\u{824}', '\u{824}', R), ('\u{825}', '\u{827}', NSM), ('\u{828}', '\u{828}', R),
('\u{829}', '\u{82d}', NSM), ('\u{82e}', '\u{858}', R), ('\u{859}', '\u{85b}', NSM),
('\u{85c}', '\u{89f}', R), ('\u{8a0}', '\u{8e2}', AL), ('\u{8e3}', '\u{902}', NSM),
('\u{903}', '\u{939}', L), ('\u{93a}', '\u{93a}', NSM), ('\u{93b}', '\u{93b}', L),
('\u{93c}', '\u{93c}', NSM), ('\u{93d}', '\u{940}', L), ('\u{941}', '\u{948}', NSM),
('\u{949}', '\u{94c}', L), ('\u{94d}', '\u{94d}', NSM), ('\u{94e}', '\u{950}', L),
('\u{951}', '\u{957}', NSM), ('\u{958}', '\u{961}', L), ('\u{962}', '\u{963}', NSM),
('\u{964}', '\u{980}', L), ('\u{981}', '\u{981}', NSM), ('\u{982}', '\u{983}', L),
('\u{985}', '\u{98c}', L), ('\u{98f}', '\u{990}', L), ('\u{993}', '\u{9a8}', L), ('\u{9aa}',
'\u{9b0}', L), ('\u{9b2}', '\u{9b2}', L), ('\u{9b6}', '\u{9b9}', L), ('\u{9bc}', '\u{9bc}',
NSM), ('\u{9bd}', '\u{9c0}', L), ('\u{9c1}', '\u{9c4}', NSM), ('\u{9c7}', '\u{9c8}', L),
('\u{9cb}', '\u{9cc}', L), ('\u{9cd}', '\u{9cd}', NSM), ('\u{9ce}', '\u{9ce}', L),
('\u{9d7}', '\u{9d7}', L), ('\u{9dc}', '\u{9dd}', L), ('\u{9df}', '\u{9e1}', L), ('\u{9e2}',
'\u{9e3}', NSM), ('\u{9e6}', '\u{9f1}', L), ('\u{9f2}', '\u{9f3}', ET), ('\u{9f4}',
'\u{9fa}', L), ('\u{9fb}', '\u{9fb}', ET), ('\u{a01}', '\u{a02}', NSM), ('\u{a03}',
'\u{a03}', L), ('\u{a05}', '\u{a0a}', L), ('\u{a0f}', '\u{a10}', L), ('\u{a13}', '\u{a28}',
L), ('\u{a2a}', '\u{a30}', L), ('\u{a32}', '\u{a33}', L), ('\u{a35}', '\u{a36}', L),
('\u{a38}', '\u{a39}', L), ('\u{a3c}', '\u{a3c}', NSM), ('\u{a3e}', '\u{a40}', L),
('\u{a41}', '\u{a42}', NSM), ('\u{a47}', '\u{a48}', NSM), ('\u{a4b}', '\u{a4d}', NSM),
('\u{a51}', '\u{a51}', NSM), ('\u{a59}', '\u{a5c}', L), ('\u{a5e}', '\u{a5e}', L),
('\u{a66}', '\u{a6f}', L), ('\u{a70}', '\u{a71}', NSM), ('\u{a72}', '\u{a74}', L),
('\u{a75}', '\u{a75}', NSM), ('\u{a81}', '\u{a82}', NSM), ('\u{a83}', '\u{a83}', L),
('\u{a85}', '\u{a8d}', L), ('\u{a8f}', '\u{a91}', L), ('\u{a93}', '\u{aa8}', L), ('\u{aaa}',
'\u{ab0}', L), ('\u{ab2}', '\u{ab3}', L), ('\u{ab5}', '\u{ab9}', L), ('\u{abc}', '\u{abc}',
NSM), ('\u{abd}', '\u{ac0}', L), ('\u{ac1}', '\u{ac5}', NSM), ('\u{ac7}', '\u{ac8}', NSM),
('\u{ac9}', '\u{ac9}', L), ('\u{acb}', '\u{acc}', L), ('\u{acd}', '\u{acd}', NSM),
('\u{ad0}', '\u{ad0}', L), ('\u{ae0}', '\u{ae1}', L), ('\u{ae2}', '\u{ae3}', NSM),
('\u{ae6}', '\u{af0}', L), ('\u{af1}', '\u{af1}', ET), ('\u{af9}', '\u{af9}', L),
('\u{b01}', '\u{b01}', NSM), ('\u{b02}', '\u{b03}', L), ('\u{b05}', '\u{b0c}', L),
('\u{b0f}', '\u{b10}', L), ('\u{b13}', '\u{b28}', L), ('\u{b2a}', '\u{b30}', L), ('\u{b32}',
'\u{b33}', L), ('\u{b35}', '\u{b39}', L), ('\u{b3c}', '\u{b3c}', NSM), ('\u{b3d}',
'\u{b3e}', L), ('\u{b3f}', '\u{b3f}', NSM), ('\u{b40}', '\u{b40}', L), ('\u{b41}',
'\u{b44}', NSM), ('\u{b47}', '\u{b48}', L), ('\u{b4b}', '\u{b4c}', L), ('\u{b4d}',
'\u{b4d}', NSM), ('\u{b56}', '\u{b56}', NSM), ('\u{b57}', '\u{b57}', L), ('\u{b5c}',
'\u{b5d}', L), ('\u{b5f}', '\u{b61}', L), ('\u{b62}', '\u{b63}', NSM), ('\u{b66}',
'\u{b77}', L), ('\u{b82}', '\u{b82}', NSM), ('\u{b83}', '\u{b83}', L), ('\u{b85}',
'\u{b8a}', L), ('\u{b8e}', '\u{b90}', L), ('\u{b92}', '\u{b95}', L), ('\u{b99}', '\u{b9a}',
L), ('\u{b9c}', '\u{b9c}', L), ('\u{b9e}', '\u{b9f}', L), ('\u{ba3}', '\u{ba4}', L),
('\u{ba8}', '\u{baa}', L), ('\u{bae}', '\u{bb9}', L), ('\u{bbe}', '\u{bbf}', L), ('\u{bc0}',
'\u{bc0}', NSM), ('\u{bc1}', '\u{bc2}', L), ('\u{bc6}', '\u{bc8}', L), ('\u{bca}',
'\u{bcc}', L), ('\u{bcd}', '\u{bcd}', NSM), ('\u{bd0}', '\u{bd0}', L), ('\u{bd7}',
'\u{bd7}', L), ('\u{be6}', '\u{bf2}', L), ('\u{bf3}', '\u{bf8}', ON), ('\u{bf9}', '\u{bf9}',
ET), ('\u{bfa}', '\u{bfa}', ON), ('\u{c00}', '\u{c00}', NSM), ('\u{c01}', '\u{c03}', L),
('\u{c05}', '\u{c0c}', L), ('\u{c0e}', '\u{c10}', L), ('\u{c12}', '\u{c28}', L), ('\u{c2a}',
'\u{c39}', L), ('\u{c3d}', '\u{c3d}', L), ('\u{c3e}', '\u{c40}', NSM), ('\u{c41}',
'\u{c44}', L), ('\u{c46}', '\u{c48}', NSM), ('\u{c4a}', '\u{c4d}', NSM), ('\u{c55}',
'\u{c56}', NSM), ('\u{c58}', '\u{c5a}', L), ('\u{c60}', '\u{c61}', L), ('\u{c62}',
'\u{c63}', NSM), ('\u{c66}', '\u{c6f}', L), ('\u{c78}', '\u{c7e}', ON), ('\u{c7f}',
'\u{c7f}', L), ('\u{c81}', '\u{c81}', NSM), ('\u{c82}', '\u{c83}', L), ('\u{c85}',
'\u{c8c}', L), ('\u{c8e}', '\u{c90}', L), ('\u{c92}', '\u{ca8}', L), ('\u{caa}', '\u{cb3}',
L), ('\u{cb5}', '\u{cb9}', L), ('\u{cbc}', '\u{cbc}', NSM), ('\u{cbd}', '\u{cc4}', L),
('\u{cc6}', '\u{cc8}', L), ('\u{cca}', '\u{ccb}', L), ('\u{ccc}', '\u{ccd}', NSM),
('\u{cd5}', '\u{cd6}', L), ('\u{cde}', '\u{cde}', L), ('\u{ce0}', '\u{ce1}', L), ('\u{ce2}',
'\u{ce3}', NSM), ('\u{ce6}', '\u{cef}', L), ('\u{cf1}', '\u{cf2}', L), ('\u{d01}',
'\u{d01}', NSM), ('\u{d02}', '\u{d03}', L), ('\u{d05}', '\u{d0c}', L), ('\u{d0e}',
'\u{d10}', L), ('\u{d12}', '\u{d3a}', L), ('\u{d3d}', '\u{d40}', L), ('\u{d41}', '\u{d44}',
NSM), ('\u{d46}', '\u{d48}', L), ('\u{d4a}', '\u{d4c}', L), ('\u{d4d}', '\u{d4d}', NSM),
('\u{d4e}', '\u{d4e}', L), ('\u{d57}', '\u{d57}', L), ('\u{d5f}', '\u{d61}', L), ('\u{d62}',
'\u{d63}', NSM), ('\u{d66}', '\u{d75}', L), ('\u{d79}', '\u{d7f}', L), ('\u{d82}',
'\u{d83}', L), ('\u{d85}', '\u{d96}', L), ('\u{d9a}', '\u{db1}', L), ('\u{db3}', '\u{dbb}',
L), ('\u{dbd}', '\u{dbd}', L), ('\u{dc0}', '\u{dc6}', L), ('\u{dca}', '\u{dca}', NSM),
('\u{dcf}', '\u{dd1}', L), ('\u{dd2}', '\u{dd4}', NSM), ('\u{dd6}', '\u{dd6}', NSM),
('\u{dd8}', '\u{ddf}', L), ('\u{de6}', '\u{def}', L), ('\u{df2}', '\u{df4}', L), ('\u{e01}',
'\u{e30}', L), ('\u{e31}', '\u{e31}', NSM), ('\u{e32}', '\u{e33}', L), ('\u{e34}',
'\u{e3a}', NSM), ('\u{e3f}', '\u{e3f}', ET), ('\u{e40}', '\u{e46}', L), ('\u{e47}',
'\u{e4e}', NSM), ('\u{e4f}', '\u{e5b}', L), ('\u{e81}', '\u{e82}', L), ('\u{e84}',
'\u{e84}', L), ('\u{e87}', '\u{e88}', L), ('\u{e8a}', '\u{e8a}', L), ('\u{e8d}', '\u{e8d}',
L), ('\u{e94}', '\u{e97}', L), ('\u{e99}', '\u{e9f}', L), ('\u{ea1}', '\u{ea3}', L),
('\u{ea5}', '\u{ea5}', L), ('\u{ea7}', '\u{ea7}', L), ('\u{eaa}', '\u{eab}', L), ('\u{ead}',
'\u{eb0}', L), ('\u{eb1}', '\u{eb1}', NSM), ('\u{eb2}', '\u{eb3}', L), ('\u{eb4}',
'\u{eb9}', NSM), ('\u{ebb}', '\u{ebc}', NSM), ('\u{ebd}', '\u{ebd}', L), ('\u{ec0}',
'\u{ec4}', L), ('\u{ec6}', '\u{ec6}', L), ('\u{ec8}', '\u{ecd}', NSM), ('\u{ed0}',
'\u{ed9}', L), ('\u{edc}', '\u{edf}', L), ('\u{f00}', '\u{f17}', L), ('\u{f18}', '\u{f19}',
NSM), ('\u{f1a}', '\u{f34}', L), ('\u{f35}', '\u{f35}', NSM), ('\u{f36}', '\u{f36}', L),
('\u{f37}', '\u{f37}', NSM), ('\u{f38}', '\u{f38}', L), ('\u{f39}', '\u{f39}', NSM),
('\u{f3a}', '\u{f3d}', ON), ('\u{f3e}', '\u{f47}', L), ('\u{f49}', '\u{f6c}', L),
('\u{f71}', '\u{f7e}', NSM), ('\u{f7f}', '\u{f7f}', L), ('\u{f80}', '\u{f84}', NSM),
('\u{f85}', '\u{f85}', L), ('\u{f86}', '\u{f87}', NSM), ('\u{f88}', '\u{f8c}', L),
('\u{f8d}', '\u{f97}', NSM), ('\u{f99}', '\u{fbc}', NSM), ('\u{fbe}', '\u{fc5}', L),
('\u{fc6}', '\u{fc6}', NSM), ('\u{fc7}', '\u{fcc}', L), ('\u{fce}', '\u{fda}', L),
('\u{1000}', '\u{102c}', L), ('\u{102d}', '\u{1030}', NSM), ('\u{1031}', '\u{1031}', L),
('\u{1032}', '\u{1037}', NSM), ('\u{1038}', '\u{1038}', L), ('\u{1039}', '\u{103a}', NSM),
('\u{103b}', '\u{103c}', L), ('\u{103d}', '\u{103e}', NSM), ('\u{103f}', '\u{1057}', L),
('\u{1058}', '\u{1059}', NSM), ('\u{105a}', '\u{105d}', L), ('\u{105e}', '\u{1060}', NSM),
('\u{1061}', '\u{1070}', L), ('\u{1071}', '\u{1074}', NSM), ('\u{1075}', '\u{1081}', L),
('\u{1082}', '\u{1082}', NSM), ('\u{1083}', '\u{1084}', L), ('\u{1085}', '\u{1086}', NSM),
('\u{1087}', '\u{108c}', L), ('\u{108d}', '\u{108d}', NSM), ('\u{108e}', '\u{109c}', L),
('\u{109d}', '\u{109d}', NSM), ('\u{109e}', '\u{10c5}', L), ('\u{10c7}', '\u{10c7}', L),
('\u{10cd}', '\u{10cd}', L), ('\u{10d0}', '\u{1248}', L), ('\u{124a}', '\u{124d}', L),
('\u{1250}', '\u{1256}', L), ('\u{1258}', '\u{1258}', L), ('\u{125a}', '\u{125d}', L),
('\u{1260}', '\u{1288}', L), ('\u{128a}', '\u{128d}', L), ('\u{1290}', '\u{12b0}', L),
('\u{12b2}', '\u{12b5}', L), ('\u{12b8}', '\u{12be}', L), ('\u{12c0}', '\u{12c0}', L),
('\u{12c2}', '\u{12c5}', L), ('\u{12c8}', '\u{12d6}', L), ('\u{12d8}', '\u{1310}', L),
('\u{1312}', '\u{1315}', L), ('\u{1318}', '\u{135a}', L), ('\u{135d}', '\u{135f}', NSM),
('\u{1360}', '\u{137c}', L), ('\u{1380}', '\u{138f}', L), ('\u{1390}', '\u{1399}', ON),
('\u{13a0}', '\u{13f5}', L), ('\u{13f8}', '\u{13fd}', L), ('\u{1400}', '\u{1400}', ON),
('\u{1401}', '\u{167f}', L), ('\u{1680}', '\u{1680}', WS), ('\u{1681}', '\u{169a}', L),
('\u{169b}', '\u{169c}', ON), ('\u{16a0}', '\u{16f8}', L), ('\u{1700}', '\u{170c}', L),
('\u{170e}', '\u{1711}', L), ('\u{1712}', '\u{1714}', NSM), ('\u{1720}', '\u{1731}', L),
('\u{1732}', '\u{1734}', NSM), ('\u{1735}', '\u{1736}', L), ('\u{1740}', '\u{1751}', L),
('\u{1752}', '\u{1753}', NSM), ('\u{1760}', '\u{176c}', L), ('\u{176e}', '\u{1770}', L),
('\u{1772}', '\u{1773}', NSM), ('\u{1780}', '\u{17b3}', L), ('\u{17b4}', '\u{17b5}', NSM),
('\u{17b6}', '\u{17b6}', L), ('\u{17b7}', '\u{17bd}', NSM), ('\u{17be}', '\u{17c5}', L),
('\u{17c6}', '\u{17c6}', NSM), ('\u{17c7}', '\u{17c8}', L), ('\u{17c9}', '\u{17d3}', NSM),
('\u{17d4}', '\u{17da}', L), ('\u{17db}', '\u{17db}', ET), ('\u{17dc}', '\u{17dc}', L),
('\u{17dd}', '\u{17dd}', NSM), ('\u{17e0}', '\u{17e9}', L), ('\u{17f0}', '\u{17f9}', ON),
('\u{1800}', '\u{180a}', ON), ('\u{180b}', '\u{180d}', NSM), ('\u{180e}', '\u{180e}', BN),
('\u{1810}', '\u{1819}', L), ('\u{1820}', '\u{1877}', L), ('\u{1880}', '\u{18a8}', L),
('\u{18a9}', '\u{18a9}', NSM), ('\u{18aa}', '\u{18aa}', L), ('\u{18b0}', '\u{18f5}', L),
('\u{1900}', '\u{191e}', L), ('\u{1920}', '\u{1922}', NSM), ('\u{1923}', '\u{1926}', L),
('\u{1927}', '\u{1928}', NSM), ('\u{1929}', '\u{192b}', L), ('\u{1930}', '\u{1931}', L),
('\u{1932}', '\u{1932}', NSM), ('\u{1933}', '\u{1938}', L), ('\u{1939}', '\u{193b}', NSM),
('\u{1940}', '\u{1940}', ON), ('\u{1944}', '\u{1945}', ON), ('\u{1946}', '\u{196d}', L),
('\u{1970}', '\u{1974}', L), ('\u{1980}', '\u{19ab}', L), ('\u{19b0}', '\u{19c9}', L),
('\u{19d0}', '\u{19da}', L), ('\u{19de}', '\u{19ff}', ON), ('\u{1a00}', '\u{1a16}', L),
('\u{1a17}', '\u{1a18}', NSM), ('\u{1a19}', '\u{1a1a}', L), ('\u{1a1b}', '\u{1a1b}', NSM),
('\u{1a1e}', '\u{1a55}', L), ('\u{1a56}', '\u{1a56}', NSM), ('\u{1a57}', '\u{1a57}', L),
('\u{1a58}', '\u{1a5e}', NSM), ('\u{1a60}', '\u{1a60}', NSM), ('\u{1a61}', '\u{1a61}', L),
('\u{1a62}', '\u{1a62}', NSM), ('\u{1a63}', '\u{1a64}', L), ('\u{1a65}', '\u{1a6c}', NSM),
('\u{1a6d}', '\u{1a72}', L), ('\u{1a73}', '\u{1a7c}', NSM), ('\u{1a7f}', '\u{1a7f}', NSM),
('\u{1a80}', '\u{1a89}', L), ('\u{1a90}', '\u{1a99}', L), ('\u{1aa0}', '\u{1aad}', L),
('\u{1ab0}', '\u{1abe}', NSM), ('\u{1b00}', '\u{1b03}', NSM), ('\u{1b04}', '\u{1b33}', L),
('\u{1b34}', '\u{1b34}', NSM), ('\u{1b35}', '\u{1b35}', L), ('\u{1b36}', '\u{1b3a}', NSM),
('\u{1b3b}', '\u{1b3b}', L), ('\u{1b3c}', '\u{1b3c}', NSM), ('\u{1b3d}', '\u{1b41}', L),
('\u{1b42}', '\u{1b42}', NSM), ('\u{1b43}', '\u{1b4b}', L), ('\u{1b50}', '\u{1b6a}', L),
('\u{1b6b}', '\u{1b73}', NSM), ('\u{1b74}', '\u{1b7c}', L), ('\u{1b80}', '\u{1b81}', NSM),
('\u{1b82}', '\u{1ba1}', L), ('\u{1ba2}', '\u{1ba5}', NSM), ('\u{1ba6}', '\u{1ba7}', L),
('\u{1ba8}', '\u{1ba9}', NSM), ('\u{1baa}', '\u{1baa}', L), ('\u{1bab}', '\u{1bad}', NSM),
('\u{1bae}', '\u{1be5}', L), ('\u{1be6}', '\u{1be6}', NSM), ('\u{1be7}', '\u{1be7}', L),
('\u{1be8}', '\u{1be9}', NSM), ('\u{1bea}', '\u{1bec}', L), ('\u{1bed}', '\u{1bed}', NSM),
('\u{1bee}', '\u{1bee}', L), ('\u{1bef}', '\u{1bf1}', NSM), ('\u{1bf2}', '\u{1bf3}', L),
('\u{1bfc}', '\u{1c2b}', L), ('\u{1c2c}', '\u{1c33}', NSM), ('\u{1c34}', '\u{1c35}', L),
('\u{1c36}', '\u{1c37}', NSM), ('\u{1c3b}', '\u{1c49}', L), ('\u{1c4d}', '\u{1c7f}', L),
('\u{1cc0}', '\u{1cc7}', L), ('\u{1cd0}', '\u{1cd2}', NSM), ('\u{1cd3}', '\u{1cd3}', L),
('\u{1cd4}', '\u{1ce0}', NSM), ('\u{1ce1}', '\u{1ce1}', L), ('\u{1ce2}', '\u{1ce8}', NSM),
('\u{1ce9}', '\u{1cec}', L), ('\u{1ced}', '\u{1ced}', NSM), ('\u{1cee}', '\u{1cf3}', L),
('\u{1cf4}', '\u{1cf4}', NSM), ('\u{1cf5}', '\u{1cf6}', L), ('\u{1cf8}', '\u{1cf9}', NSM),
('\u{1d00}', '\u{1dbf}', L), ('\u{1dc0}', '\u{1df5}', NSM), ('\u{1dfc}', '\u{1dff}', NSM),
('\u{1e00}', '\u{1f15}', L), ('\u{1f18}', '\u{1f1d}', L), ('\u{1f20}', '\u{1f45}', L),
('\u{1f48}', '\u{1f4d}', L), ('\u{1f50}', '\u{1f57}', L), ('\u{1f59}', '\u{1f59}', L),
('\u{1f5b}', '\u{1f5b}', L), ('\u{1f5d}', '\u{1f5d}', L), ('\u{1f5f}', '\u{1f7d}', L),
('\u{1f80}', '\u{1fb4}', L), ('\u{1fb6}', '\u{1fbc}', L), ('\u{1fbd}', '\u{1fbd}', ON),
('\u{1fbe}', '\u{1fbe}', L), ('\u{1fbf}', '\u{1fc1}', ON), ('\u{1fc2}', '\u{1fc4}', L),
('\u{1fc6}', '\u{1fcc}', L), ('\u{1fcd}', '\u{1fcf}', ON), ('\u{1fd0}', '\u{1fd3}', L),
('\u{1fd6}', '\u{1fdb}', L), ('\u{1fdd}', '\u{1fdf}', ON), ('\u{1fe0}', '\u{1fec}', L),
('\u{1fed}', '\u{1fef}', ON), ('\u{1ff2}', '\u{1ff4}', L), ('\u{1ff6}', '\u{1ffc}', L),
('\u{1ffd}', '\u{1ffe}', ON), ('\u{2000}', '\u{200a}', WS), ('\u{200b}', '\u{200d}', BN),
('\u{200e}', '\u{200e}', L), ('\u{200f}', '\u{200f}', R), ('\u{2010}', '\u{2027}', ON),
('\u{2028}', '\u{2028}', WS), ('\u{2029}', '\u{2029}', B), ('\u{202a}', '\u{202a}', LRE),
('\u{202b}', '\u{202b}', RLE), ('\u{202c}', '\u{202c}', PDF), ('\u{202d}', '\u{202d}', LRO),
('\u{202e}', '\u{202e}', RLO), ('\u{202f}', '\u{202f}', CS), ('\u{2030}', '\u{2034}', ET),
('\u{2035}', '\u{2043}', ON), ('\u{2044}', '\u{2044}', CS), ('\u{2045}', '\u{205e}', ON),
('\u{205f}', '\u{205f}', WS), ('\u{2060}', '\u{2064}', BN), ('\u{2066}', '\u{2066}', LRI),
('\u{2067}', '\u{2067}', RLI), ('\u{2068}', '\u{2068}', FSI), ('\u{2069}', '\u{2069}', PDI),
('\u{206a}', '\u{206f}', BN), ('\u{2070}', '\u{2070}', EN), ('\u{2071}', '\u{2071}', L),
('\u{2074}', '\u{2079}', EN), ('\u{207a}', '\u{207b}', ES), ('\u{207c}', '\u{207e}', ON),
('\u{207f}', '\u{207f}', L), ('\u{2080}', '\u{2089}', EN), ('\u{208a}', '\u{208b}', ES),
('\u{208c}', '\u{208e}', ON), ('\u{2090}', '\u{209c}', L), ('\u{20a0}', '\u{20cf}', ET),
('\u{20d0}', '\u{20f0}', NSM), ('\u{2100}', '\u{2101}', ON), ('\u{2102}', '\u{2102}', L),
('\u{2103}', '\u{2106}', ON), ('\u{2107}', '\u{2107}', L), ('\u{2108}', '\u{2109}', ON),
('\u{210a}', '\u{2113}', L), ('\u{2114}', '\u{2114}', ON), ('\u{2115}', '\u{2115}', L),
('\u{2116}', '\u{2118}', ON), ('\u{2119}', '\u{211d}', L), ('\u{211e}', '\u{2123}', ON),
('\u{2124}', '\u{2124}', L), ('\u{2125}', '\u{2125}', ON), ('\u{2126}', '\u{2126}', L),
('\u{2127}', '\u{2127}', ON), ('\u{2128}', '\u{2128}', L), ('\u{2129}', '\u{2129}', ON),
('\u{212a}', '\u{212d}', L), ('\u{212e}', '\u{212e}', ET), ('\u{212f}', '\u{2139}', L),
('\u{213a}', '\u{213b}', ON), ('\u{213c}', '\u{213f}', L), ('\u{2140}', '\u{2144}', ON),
('\u{2145}', '\u{2149}', L), ('\u{214a}', '\u{214d}', ON), ('\u{214e}', '\u{214f}', L),
('\u{2150}', '\u{215f}', ON), ('\u{2160}', '\u{2188}', L), ('\u{2189}', '\u{218b}', ON),
('\u{2190}', '\u{2211}', ON), ('\u{2212}', '\u{2212}', ES), ('\u{2213}', '\u{2213}', ET),
('\u{2214}', '\u{2335}', ON), ('\u{2336}', '\u{237a}', L), ('\u{237b}', '\u{2394}', ON),
('\u{2395}', '\u{2395}', L), ('\u{2396}', '\u{23fa}', ON), ('\u{2400}', '\u{2426}', ON),
('\u{2440}', '\u{244a}', ON), ('\u{2460}', '\u{2487}', ON), ('\u{2488}', '\u{249b}', EN),
('\u{249c}', '\u{24e9}', L), ('\u{24ea}', '\u{26ab}', ON), ('\u{26ac}', '\u{26ac}', L),
('\u{26ad}', '\u{27ff}', ON), ('\u{2800}', '\u{28ff}', L), ('\u{2900}', '\u{2b73}', ON),
('\u{2b76}', '\u{2b95}', ON), ('\u{2b98}', '\u{2bb9}', ON), ('\u{2bbd}', '\u{2bc8}', ON),
('\u{2bca}', '\u{2bd1}', ON), ('\u{2bec}', '\u{2bef}', ON), ('\u{2c00}', '\u{2c2e}', L),
('\u{2c30}', '\u{2c5e}', L), ('\u{2c60}', '\u{2ce4}', L), ('\u{2ce5}', '\u{2cea}', ON),
('\u{2ceb}', '\u{2cee}', L), ('\u{2cef}', '\u{2cf1}', NSM), ('\u{2cf2}', '\u{2cf3}', L),
('\u{2cf9}', '\u{2cff}', ON), ('\u{2d00}', '\u{2d25}', L), ('\u{2d27}', '\u{2d27}', L),
('\u{2d2d}', '\u{2d2d}', L), ('\u{2d30}', '\u{2d67}', L), ('\u{2d6f}', '\u{2d70}', L),
('\u{2d7f}', '\u{2d7f}', NSM), ('\u{2d80}', '\u{2d96}', L), ('\u{2da0}', '\u{2da6}', L),
('\u{2da8}', '\u{2dae}', L), ('\u{2db0}', '\u{2db6}', L), ('\u{2db8}', '\u{2dbe}', L),
('\u{2dc0}', '\u{2dc6}', L), ('\u{2dc8}', '\u{2dce}', L), ('\u{2dd0}', '\u{2dd6}', L),
('\u{2dd8}', '\u{2dde}', L), ('\u{2de0}', '\u{2dff}', NSM), ('\u{2e00}', '\u{2e42}', ON),
('\u{2e80}', '\u{2e99}', ON), ('\u{2e9b}', '\u{2ef3}', ON), ('\u{2f00}', '\u{2fd5}', ON),
('\u{2ff0}', '\u{2ffb}', ON), ('\u{3000}', '\u{3000}', WS), ('\u{3001}', '\u{3004}', ON),
('\u{3005}', '\u{3007}', L), ('\u{3008}', '\u{3020}', ON), ('\u{3021}', '\u{3029}', L),
('\u{302a}', '\u{302d}', NSM), ('\u{302e}', '\u{302f}', L), ('\u{3030}', '\u{3030}', ON),
('\u{3031}', '\u{3035}', L), ('\u{3036}', '\u{3037}', ON), ('\u{3038}', '\u{303c}', L),
('\u{303d}', '\u{303f}', ON), ('\u{3041}', '\u{3096}', L), ('\u{3099}', '\u{309a}', NSM),
('\u{309b}', '\u{309c}', ON), ('\u{309d}', '\u{309f}', L), ('\u{30a0}', '\u{30a0}', ON),
('\u{30a1}', '\u{30fa}', L), ('\u{30fb}', '\u{30fb}', ON), ('\u{30fc}', '\u{30ff}', L),
('\u{3105}', '\u{312d}', L), ('\u{3131}', '\u{318e}', L), ('\u{3190}', '\u{31ba}', L),
('\u{31c0}', '\u{31e3}', ON), ('\u{31f0}', '\u{321c}', L), ('\u{321d}', '\u{321e}', ON),
('\u{3220}', '\u{324f}', L), ('\u{3250}', '\u{325f}', ON), ('\u{3260}', '\u{327b}', L),
('\u{327c}', '\u{327e}', ON), ('\u{327f}', '\u{32b0}', L), ('\u{32b1}', '\u{32bf}', ON),
('\u{32c0}', '\u{32cb}', L), ('\u{32cc}', '\u{32cf}', ON), ('\u{32d0}', '\u{32fe}', L),
('\u{3300}', '\u{3376}', L), ('\u{3377}', '\u{337a}', ON), ('\u{337b}', '\u{33dd}', L),
('\u{33de}', '\u{33df}', ON), ('\u{33e0}', '\u{33fe}', L), ('\u{33ff}', '\u{33ff}', ON),
('\u{3400}', '\u{4db5}', L), ('\u{4dc0}', '\u{4dff}', ON), ('\u{4e00}', '\u{9fd5}', L),
('\u{a000}', '\u{a48c}', L), ('\u{a490}', '\u{a4c6}', ON), ('\u{a4d0}', '\u{a60c}', L),
('\u{a60d}', '\u{a60f}', ON), ('\u{a610}', '\u{a62b}', L), ('\u{a640}', '\u{a66e}', L),
('\u{a66f}', '\u{a672}', NSM), ('\u{a673}', '\u{a673}', ON), ('\u{a674}', '\u{a67d}', NSM),
('\u{a67e}', '\u{a67f}', ON), ('\u{a680}', '\u{a69d}', L), ('\u{a69e}', '\u{a69f}', NSM),
('\u{a6a0}', '\u{a6ef}', L), ('\u{a6f0}', '\u{a6f1}', NSM), ('\u{a6f2}', '\u{a6f7}', L),
('\u{a700}', '\u{a721}', ON), ('\u{a722}', '\u{a787}', L), ('\u{a788}', '\u{a788}', ON),
('\u{a789}', '\u{a7ad}', L), ('\u{a7b0}', '\u{a7b7}', L), ('\u{a7f7}', '\u{a801}', L),
('\u{a802}', '\u{a802}', NSM), ('\u{a803}', '\u{a805}', L), ('\u{a806}', '\u{a806}', NSM),
('\u{a807}', '\u{a80a}', L), ('\u{a80b}', '\u{a80b}', NSM), ('\u{a80c}', '\u{a824}', L),
('\u{a825}', '\u{a826}', NSM), ('\u{a827}', '\u{a827}', L), ('\u{a828}', '\u{a82b}', ON),
('\u{a830}', '\u{a837}', L), ('\u{a838}', '\u{a839}', ET), ('\u{a840}', '\u{a873}', L),
('\u{a874}', '\u{a877}', ON), ('\u{a880}', '\u{a8c3}', L), ('\u{a8c4}', '\u{a8c4}', NSM),
('\u{a8ce}', '\u{a8d9}', L), ('\u{a8e0}', '\u{a8f1}', NSM), ('\u{a8f2}', '\u{a8fd}', L),
('\u{a900}', '\u{a925}', L), ('\u{a926}', '\u{a92d}', NSM), ('\u{a92e}', '\u{a946}', L),
('\u{a947}', '\u{a951}', NSM), ('\u{a952}', '\u{a953}', L), ('\u{a95f}', '\u{a97c}', L),
('\u{a980}', '\u{a982}', NSM), ('\u{a983}', '\u{a9b2}', L), ('\u{a9b3}', '\u{a9b3}', NSM),
('\u{a9b4}', '\u{a9b5}', L), ('\u{a9b6}', '\u{a9b9}', NSM), ('\u{a9ba}', '\u{a9bb}', L),
('\u{a9bc}', '\u{a9bc}', NSM), ('\u{a9bd}', '\u{a9cd}', L), ('\u{a9cf}', '\u{a9d9}', L),
('\u{a9de}', '\u{a9e4}', L), ('\u{a9e5}', '\u{a9e5}', NSM), ('\u{a9e6}', '\u{a9fe}', L),
('\u{aa00}', '\u{aa28}', L), ('\u{aa29}', '\u{aa2e}', NSM), ('\u{aa2f}', '\u{aa30}', L),
('\u{aa31}', '\u{aa32}', NSM), ('\u{aa33}', '\u{aa34}', L), ('\u{aa35}', '\u{aa36}', NSM),
('\u{aa40}', '\u{aa42}', L), ('\u{aa43}', '\u{aa43}', NSM), ('\u{aa44}', '\u{aa4b}', L),
('\u{aa4c}', '\u{aa4c}', NSM), ('\u{aa4d}', '\u{aa4d}', L), ('\u{aa50}', '\u{aa59}', L),
('\u{aa5c}', '\u{aa7b}', L), ('\u{aa7c}', '\u{aa7c}', NSM), ('\u{aa7d}', '\u{aaaf}', L),
('\u{aab0}', '\u{aab0}', NSM), ('\u{aab1}', '\u{aab1}', L), ('\u{aab2}', '\u{aab4}', NSM),
('\u{aab5}', '\u{aab6}', L), ('\u{aab7}', '\u{aab8}', NSM), ('\u{aab9}', '\u{aabd}', L),
('\u{aabe}', '\u{aabf}', NSM), ('\u{aac0}', '\u{aac0}', L), ('\u{aac1}', '\u{aac1}', NSM),
('\u{aac2}', '\u{aac2}', L), ('\u{aadb}', '\u{aaeb}', L), ('\u{aaec}', '\u{aaed}', NSM),
('\u{aaee}', '\u{aaf5}', L), ('\u{aaf6}', '\u{aaf6}', NSM), ('\u{ab01}', '\u{ab06}', L),
('\u{ab09}', '\u{ab0e}', L), ('\u{ab11}', '\u{ab16}', L), ('\u{ab20}', '\u{ab26}', L),
('\u{ab28}', '\u{ab2e}', L), ('\u{ab30}', '\u{ab65}', L), ('\u{ab70}', '\u{abe4}', L),
('\u{abe5}', '\u{abe5}', NSM), ('\u{abe6}', '\u{abe7}', L), ('\u{abe8}', '\u{abe8}', NSM),
('\u{abe9}', '\u{abec}', L), ('\u{abed}', '\u{abed}', NSM), ('\u{abf0}', '\u{abf9}', L),
('\u{ac00}', '\u{d7a3}', L), ('\u{d7b0}', '\u{d7c6}', L), ('\u{d7cb}', '\u{d7fb}', L),
('\u{e000}', '\u{fa6d}', L), ('\u{fa70}', '\u{fad9}', L), ('\u{fb00}', '\u{fb06}', L),
('\u{fb13}', '\u{fb17}', L), ('\u{fb1d}', '\u{fb1d}', R), ('\u{fb1e}', '\u{fb1e}', NSM),
('\u{fb1f}', '\u{fb28}', R), ('\u{fb29}', '\u{fb29}', ES), ('\u{fb2a}', '\u{fb4f}', R),
('\u{fb50}', '\u{fd3d}', AL), ('\u{fd3e}', '\u{fd3f}', ON), ('\u{fd40}', '\u{fdcf}', AL),
('\u{fdf0}', '\u{fdfc}', AL), ('\u{fdfd}', '\u{fdfd}', ON), ('\u{fdfe}', '\u{fdff}', AL),
('\u{fe00}', '\u{fe0f}', NSM), ('\u{fe10}', '\u{fe19}', ON), ('\u{fe20}', '\u{fe2f}', NSM),
('\u{fe30}', '\u{fe4f}', ON), ('\u{fe50}', '\u{fe50}', CS), ('\u{fe51}', '\u{fe51}', ON),
('\u{fe52}', '\u{fe52}', CS), ('\u{fe54}', '\u{fe54}', ON), ('\u{fe55}', '\u{fe55}', CS),
('\u{fe56}', '\u{fe5e}', ON), ('\u{fe5f}', '\u{fe5f}', ET), ('\u{fe60}', '\u{fe61}', ON),
('\u{fe62}', '\u{fe63}', ES), ('\u{fe64}', '\u{fe66}', ON), ('\u{fe68}', '\u{fe68}', ON),
('\u{fe69}', '\u{fe6a}', ET), ('\u{fe6b}', '\u{fe6b}', ON), ('\u{fe70}', '\u{fefe}', AL),
('\u{feff}', '\u{feff}', BN), ('\u{ff01}', '\u{ff02}', ON), ('\u{ff03}', '\u{ff05}', ET),
('\u{ff06}', '\u{ff0a}', ON), ('\u{ff0b}', '\u{ff0b}', ES), ('\u{ff0c}', '\u{ff0c}', CS),
('\u{ff0d}', '\u{ff0d}', ES), ('\u{ff0e}', '\u{ff0f}', CS), ('\u{ff10}', '\u{ff19}', EN),
('\u{ff1a}', '\u{ff1a}', CS), ('\u{ff1b}', '\u{ff20}', ON), ('\u{ff21}', '\u{ff3a}', L),
('\u{ff3b}', '\u{ff40}', ON), ('\u{ff41}', '\u{ff5a}', L), ('\u{ff5b}', '\u{ff65}', ON),
('\u{ff66}', '\u{ffbe}', L), ('\u{ffc2}', '\u{ffc7}', L), ('\u{ffca}', '\u{ffcf}', L),
('\u{ffd2}', '\u{ffd7}', L), ('\u{ffda}', '\u{ffdc}', L), ('\u{ffe0}', '\u{ffe1}', ET),
('\u{ffe2}', '\u{ffe4}', ON), ('\u{ffe5}', '\u{ffe6}', ET), ('\u{ffe8}', '\u{ffee}', ON),
('\u{fff9}', '\u{fffd}', ON), ('\u{10000}', '\u{1000b}', L), ('\u{1000d}', '\u{10026}', L),
('\u{10028}', '\u{1003a}', L), ('\u{1003c}', '\u{1003d}', L), ('\u{1003f}', '\u{1004d}', L),
('\u{10050}', '\u{1005d}', L), ('\u{10080}', '\u{100fa}', L), ('\u{10100}', '\u{10100}', L),
('\u{10101}', '\u{10101}', ON), ('\u{10102}', '\u{10102}', L), ('\u{10107}', '\u{10133}',
L), ('\u{10137}', '\u{1013f}', L), ('\u{10140}', '\u{1018c}', ON), ('\u{10190}',
'\u{1019b}', ON), ('\u{101a0}', '\u{101a0}', ON), ('\u{101d0}', '\u{101fc}', L),
('\u{101fd}', '\u{101fd}', NSM), ('\u{10280}', '\u{1029c}', L), ('\u{102a0}', '\u{102d0}',
L), ('\u{102e0}', '\u{102e0}', NSM), ('\u{102e1}', '\u{102fb}', EN), ('\u{10300}',
'\u{10323}', L), ('\u{10330}', '\u{1034a}', L), ('\u{10350}', '\u{10375}', L), ('\u{10376}',
'\u{1037a}', NSM), ('\u{10380}', '\u{1039d}', L), ('\u{1039f}', '\u{103c3}', L),
('\u{103c8}', '\u{103d5}', L), ('\u{10400}', '\u{1049d}', L), ('\u{104a0}', '\u{104a9}', L),
('\u{10500}', '\u{10527}', L), ('\u{10530}', '\u{10563}', L), ('\u{1056f}', '\u{1056f}', L),
('\u{10600}', '\u{10736}', L), ('\u{10740}', '\u{10755}', L), ('\u{10760}', '\u{10767}', L),
('\u{10800}', '\u{1091e}', R), ('\u{1091f}', '\u{1091f}', ON), ('\u{10920}', '\u{10a00}',
R), ('\u{10a01}', '\u{10a03}', NSM), ('\u{10a04}', '\u{10a04}', R), ('\u{10a05}',
'\u{10a06}', NSM), ('\u{10a07}', '\u{10a0b}', R), ('\u{10a0c}', '\u{10a0f}', NSM),
('\u{10a10}', '\u{10a37}', R), ('\u{10a38}', '\u{10a3a}', NSM), ('\u{10a3b}', '\u{10a3e}',
R), ('\u{10a3f}', '\u{10a3f}', NSM), ('\u{10a40}', '\u{10ae4}', R), ('\u{10ae5}',
'\u{10ae6}', NSM), ('\u{10ae7}', '\u{10b38}', R), ('\u{10b39}', '\u{10b3f}', ON),
('\u{10b40}', '\u{10e5f}', R), ('\u{10e60}', '\u{10e7e}', AN), ('\u{10e7f}', '\u{10fff}',
R), ('\u{11000}', '\u{11000}', L), ('\u{11001}', '\u{11001}', NSM), ('\u{11002}',
'\u{11037}', L), ('\u{11038}', '\u{11046}', NSM), ('\u{11047}', '\u{1104d}', L),
('\u{11052}', '\u{11065}', ON), ('\u{11066}', '\u{1106f}', L), ('\u{1107f}', '\u{11081}',
NSM), ('\u{11082}', '\u{110b2}', L), ('\u{110b3}', '\u{110b6}', NSM), ('\u{110b7}',
'\u{110b8}', L), ('\u{110b9}', '\u{110ba}', NSM), ('\u{110bb}', '\u{110c1}', L),
('\u{110d0}', '\u{110e8}', L), ('\u{110f0}', '\u{110f9}', L), ('\u{11100}', '\u{11102}',
NSM), ('\u{11103}', '\u{11126}', L), ('\u{11127}', '\u{1112b}', NSM), ('\u{1112c}',
'\u{1112c}', L), ('\u{1112d}', '\u{11134}', NSM), ('\u{11136}', '\u{11143}', L),
('\u{11150}', '\u{11172}', L), ('\u{11173}', '\u{11173}', NSM), ('\u{11174}', '\u{11176}',
L), ('\u{11180}', '\u{11181}', NSM), ('\u{11182}', '\u{111b5}', L), ('\u{111b6}',
'\u{111be}', NSM), ('\u{111bf}', '\u{111c9}', L), ('\u{111ca}', '\u{111cc}', NSM),
('\u{111cd}', '\u{111cd}', L), ('\u{111d0}', '\u{111df}', L), ('\u{111e1}', '\u{111f4}', L),
('\u{11200}', '\u{11211}', L), ('\u{11213}', '\u{1122e}', L), ('\u{1122f}', '\u{11231}',
NSM), ('\u{11232}', '\u{11233}', L), ('\u{11234}', '\u{11234}', NSM), ('\u{11235}',
'\u{11235}', L), ('\u{11236}', '\u{11237}', NSM), ('\u{11238}', '\u{1123d}', L),
('\u{11280}', '\u{11286}', L), ('\u{11288}', '\u{11288}', L), ('\u{1128a}', '\u{1128d}', L),
('\u{1128f}', '\u{1129d}', L), ('\u{1129f}', '\u{112a9}', L), ('\u{112b0}', '\u{112de}', L),
('\u{112df}', '\u{112df}', NSM), ('\u{112e0}', '\u{112e2}', L), ('\u{112e3}', '\u{112ea}',
NSM), ('\u{112f0}', '\u{112f9}', L), ('\u{11300}', '\u{11301}', NSM), ('\u{11302}',
'\u{11303}', L), ('\u{11305}', '\u{1130c}', L), ('\u{1130f}', '\u{11310}', L), ('\u{11313}',
'\u{11328}', L), ('\u{1132a}', '\u{11330}', L), ('\u{11332}', '\u{11333}', L), ('\u{11335}',
'\u{11339}', L), ('\u{1133c}', '\u{1133c}', NSM), ('\u{1133d}', '\u{1133f}', L),
('\u{11340}', '\u{11340}', NSM), ('\u{11341}', '\u{11344}', L), ('\u{11347}', '\u{11348}',
L), ('\u{1134b}', '\u{1134d}', L), ('\u{11350}', '\u{11350}', L), ('\u{11357}', '\u{11357}',
L), ('\u{1135d}', '\u{11363}', L), ('\u{11366}', '\u{1136c}', NSM), ('\u{11370}',
'\u{11374}', NSM), ('\u{11480}', '\u{114b2}', L), ('\u{114b3}', '\u{114b8}', NSM),
('\u{114b9}', '\u{114b9}', L), ('\u{114ba}', '\u{114ba}', NSM), ('\u{114bb}', '\u{114be}',
L), ('\u{114bf}', '\u{114c0}', NSM), ('\u{114c1}', '\u{114c1}', L), ('\u{114c2}',
'\u{114c3}', NSM), ('\u{114c4}', '\u{114c7}', L), ('\u{114d0}', '\u{114d9}', L),
('\u{11580}', '\u{115b1}', L), ('\u{115b2}', '\u{115b5}', NSM), ('\u{115b8}', '\u{115bb}',
L), ('\u{115bc}', '\u{115bd}', NSM), ('\u{115be}', '\u{115be}', L), ('\u{115bf}',
'\u{115c0}', NSM), ('\u{115c1}', '\u{115db}', L), ('\u{115dc}', '\u{115dd}', NSM),
('\u{11600}', '\u{11632}', L), ('\u{11633}', '\u{1163a}', NSM), ('\u{1163b}', '\u{1163c}',
L), ('\u{1163d}', '\u{1163d}', NSM), ('\u{1163e}', '\u{1163e}', L), ('\u{1163f}',
'\u{11640}', NSM), ('\u{11641}', '\u{11644}', L), ('\u{11650}', '\u{11659}', L),
('\u{11680}', '\u{116aa}', L), ('\u{116ab}', '\u{116ab}', NSM), ('\u{116ac}', '\u{116ac}',
L), ('\u{116ad}', '\u{116ad}', NSM), ('\u{116ae}', '\u{116af}', L), ('\u{116b0}',
'\u{116b5}', NSM), ('\u{116b6}', '\u{116b6}', L), ('\u{116b7}', '\u{116b7}', NSM),
('\u{116c0}', '\u{116c9}', L), ('\u{11700}', '\u{11719}', L), ('\u{1171d}', '\u{1171f}',
NSM), ('\u{11720}', '\u{11721}', L), ('\u{11722}', '\u{11725}', NSM), ('\u{11726}',
'\u{11726}', L), ('\u{11727}', '\u{1172b}', NSM), ('\u{11730}', '\u{1173f}', L),
('\u{118a0}', '\u{118f2}', L), ('\u{118ff}', '\u{118ff}', L), ('\u{11ac0}', '\u{11af8}', L),
('\u{12000}', '\u{12399}', L), ('\u{12400}', '\u{1246e}', L), ('\u{12470}', '\u{12474}', L),
('\u{12480}', '\u{12543}', L), ('\u{13000}', '\u{1342e}', L), ('\u{14400}', '\u{14646}', L),
('\u{16800}', '\u{16a38}', L), ('\u{16a40}', '\u{16a5e}', L), ('\u{16a60}', '\u{16a69}', L),
('\u{16a6e}', '\u{16a6f}', L), ('\u{16ad0}', '\u{16aed}', L), ('\u{16af0}', '\u{16af4}',
NSM), ('\u{16af5}', '\u{16af5}', L), ('\u{16b00}', '\u{16b2f}', L), ('\u{16b30}',
'\u{16b36}', NSM), ('\u{16b37}', '\u{16b45}', L), ('\u{16b50}', '\u{16b59}', L),
('\u{16b5b}', '\u{16b61}', L), ('\u{16b63}', '\u{16b77}', L), ('\u{16b7d}', '\u{16b8f}', L),
('\u{16f00}', '\u{16f44}', L), ('\u{16f50}', '\u{16f7e}', L), ('\u{16f8f}', '\u{16f92}',
NSM), ('\u{16f93}', '\u{16f9f}', L), ('\u{1b000}', '\u{1b001}', L), ('\u{1bc00}',
'\u{1bc6a}', L), ('\u{1bc70}', '\u{1bc7c}', L), ('\u{1bc80}', '\u{1bc88}', L), ('\u{1bc90}',
'\u{1bc99}', L), ('\u{1bc9c}', '\u{1bc9c}', L), ('\u{1bc9d}', '\u{1bc9e}', NSM),
('\u{1bc9f}', '\u{1bc9f}', L), ('\u{1bca0}', '\u{1bca3}', BN), ('\u{1d000}', '\u{1d0f5}',
L), ('\u{1d100}', '\u{1d126}', L), ('\u{1d129}', '\u{1d166}', L), ('\u{1d167}', '\u{1d169}',
NSM), ('\u{1d16a}', '\u{1d172}', L), ('\u{1d173}', '\u{1d17a}', BN), ('\u{1d17b}',
'\u{1d182}', NSM), ('\u{1d183}', '\u{1d184}', L), ('\u{1d185}', '\u{1d18b}', NSM),
('\u{1d18c}', '\u{1d1a9}', L), ('\u{1d1aa}', '\u{1d1ad}', NSM), ('\u{1d1ae}', '\u{1d1e8}',
L), ('\u{1d200}', '\u{1d241}', ON), ('\u{1d242}', '\u{1d244}', NSM), ('\u{1d245}',
'\u{1d245}', ON), ('\u{1d300}', '\u{1d356}', ON), ('\u{1d360}', '\u{1d371}', L),
('\u{1d400}', '\u{1d454}', L), ('\u{1d456}', '\u{1d49c}', L), ('\u{1d49e}', '\u{1d49f}', L),
('\u{1d4a2}', '\u{1d4a2}', L), ('\u{1d4a5}', '\u{1d4a6}', L), ('\u{1d4a9}', '\u{1d4ac}', L),
('\u{1d4ae}', '\u{1d4b9}', L), ('\u{1d4bb}', '\u{1d4bb}', L), ('\u{1d4bd}', '\u{1d4c3}', L),
('\u{1d4c5}', '\u{1d505}', L), ('\u{1d507}', '\u{1d50a}', L), ('\u{1d50d}', '\u{1d514}', L),
('\u{1d516}', '\u{1d51c}', L), ('\u{1d51e}', '\u{1d539}', L), ('\u{1d53b}', '\u{1d53e}', L),
('\u{1d540}', '\u{1d544}', L), ('\u{1d546}', '\u{1d546}', L), ('\u{1d54a}', '\u{1d550}', L),
('\u{1d552}', '\u{1d6a5}', L), ('\u{1d6a8}', '\u{1d6da}', L), ('\u{1d6db}', '\u{1d6db}',
ON), ('\u{1d6dc}', '\u{1d714}', L), ('\u{1d715}', '\u{1d715}', ON), ('\u{1d716}',
'\u{1d74e}', L), ('\u{1d74f}', '\u{1d74f}', ON), ('\u{1d750}', '\u{1d788}', L),
('\u{1d789}', '\u{1d789}', ON), ('\u{1d78a}', '\u{1d7c2}', L), ('\u{1d7c3}', '\u{1d7c3}',
ON), ('\u{1d7c4}', '\u{1d7cb}', L), ('\u{1d7ce}', '\u{1d7ff}', EN), ('\u{1d800}',
'\u{1d9ff}', L), ('\u{1da00}', '\u{1da36}', NSM), ('\u{1da37}', '\u{1da3a}', L),
('\u{1da3b}', '\u{1da6c}', NSM), ('\u{1da6d}', '\u{1da74}', L), ('\u{1da75}', '\u{1da75}',
NSM), ('\u{1da76}', '\u{1da83}', L), ('\u{1da84}', '\u{1da84}', NSM), ('\u{1da85}',
'\u{1da8b}', L), ('\u{1da9b}', '\u{1da9f}', NSM), ('\u{1daa1}', '\u{1daaf}', NSM),
('\u{1e800}', '\u{1e8cf}', R), ('\u{1e8d0}', '\u{1e8d6}', NSM), ('\u{1e8d7}', '\u{1edff}',
R), ('\u{1ee00}', '\u{1eeef}', AL), ('\u{1eef0}', '\u{1eef1}', ON), ('\u{1eef2}',
'\u{1eeff}', AL), ('\u{1ef00}', '\u{1efff}', R), ('\u{1f000}', '\u{1f02b}', ON),
('\u{1f030}', '\u{1f093}', ON), ('\u{1f0a0}', '\u{1f0ae}', ON), ('\u{1f0b1}', '\u{1f0bf}',
ON), ('\u{1f0c1}', '\u{1f0cf}', ON), ('\u{1f0d1}', '\u{1f0f5}', ON), ('\u{1f100}',
'\u{1f10a}', EN), ('\u{1f10b}', '\u{1f10c}', ON), ('\u{1f110}', '\u{1f12e}', L),
('\u{1f130}', '\u{1f169}', L), ('\u{1f16a}', '\u{1f16b}', ON), ('\u{1f170}', '\u{1f19a}',
L), ('\u{1f1e6}', '\u{1f202}', L), ('\u{1f210}', '\u{1f23a}', L), ('\u{1f240}', '\u{1f248}',
L), ('\u{1f250}', '\u{1f251}', L), ('\u{1f300}', '\u{1f579}', ON), ('\u{1f57b}',
'\u{1f5a3}', ON), ('\u{1f5a5}', '\u{1f6d0}', ON), ('\u{1f6e0}', '\u{1f6ec}', ON),
('\u{1f6f0}', '\u{1f6f3}', ON), ('\u{1f700}', '\u{1f773}', ON), ('\u{1f780}', '\u{1f7d4}',
ON), ('\u{1f800}', '\u{1f80b}', ON), ('\u{1f810}', '\u{1f847}', ON), ('\u{1f850}',
'\u{1f859}', ON), ('\u{1f860}', '\u{1f887}', ON), ('\u{1f890}', '\u{1f8ad}', ON),
('\u{1f910}', '\u{1f918}', ON), ('\u{1f980}', '\u{1f984}', ON), ('\u{1f9c0}', '\u{1f9c0}',
ON), ('\u{20000}', '\u{2a6d6}', L), ('\u{2a700}', '\u{2b734}', L), ('\u{2b740}',
'\u{2b81d}', L), ('\u{2b820}', '\u{2cea1}', L), ('\u{2f800}', '\u{2fa1d}', L), ('\u{e0001}',
'\u{e0001}', BN), ('\u{e0020}', '\u{e007f}', BN), ('\u{e0100}', '\u{e01ef}', NSM),
('\u{f0000}', '\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L)
];

View File

@ -1,220 +0,0 @@
#!/usr/bin/env python
#
# Based on src/etc/unicode.py from Rust 1.2.0.
#
# Copyright 2011-2013 The Rust Project Developers.
# Copyright 2015 The Servo Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
import fileinput, re, os, sys, operator
preamble = '''// NOTE:
// The following code was generated by "tools/generate.py". do not edit directly
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
'''
# these are the surrogate codepoints, which are not valid rust characters
surrogate_codepoints = (0xd800, 0xdfff)
def fetch(f):
if not os.path.exists(os.path.basename(f)):
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
% f)
if not os.path.exists(os.path.basename(f)):
sys.stderr.write("cannot load %s" % f)
exit(1)
def is_surrogate(n):
return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
def load_unicode_data(f):
fetch(f)
udict = {};
range_start = -1;
for line in fileinput.input(f):
data = line.split(';');
if len(data) != 15:
continue
cp = int(data[0], 16);
if is_surrogate(cp):
continue
if range_start >= 0:
for i in xrange(range_start, cp):
udict[i] = data;
range_start = -1;
if data[1].endswith(", First>"):
range_start = cp;
continue;
udict[cp] = data;
# Mapping of code point to Bidi_Class property:
bidi_class = {}
for code in udict:
[code_org, name, gencat, combine, bidi,
decomp, deci, digit, num, mirror,
old, iso, upcase, lowcase, titlecase ] = udict[code];
if bidi not in bidi_class:
bidi_class[bidi] = []
bidi_class[bidi].append(code)
# Default Bidi_Class for unassigned codepoints.
# http://www.unicode.org/Public/UNIDATA/extracted/DerivedBidiClass.txt
default_ranges = [
(0x0600, 0x07BF, "AL"), (0x08A0, 0x08FF, "AL"),
(0xFB50, 0xFDCF, "AL"), (0xFDF0, 0xFDFF, "AL"),
(0xFE70, 0xFEFF, "AL"), (0x1EE00, 0x0001EEFF, "AL"),
(0x0590, 0x05FF, "R"), (0x07C0, 0x089F, "R"),
(0xFB1D, 0xFB4F, "R"), (0x00010800, 0x00010FFF, "R"),
(0x0001E800, 0x0001EDFF, "R"), (0x0001EF00, 0x0001EFFF, "R"),
(0x20A0, 0x20CF, "ET")]
for (start, end, default) in default_ranges:
for code in range(start, end+1):
if not code in udict:
bidi_class[default].append(code)
bidi_class = group_cats(bidi_class)
return bidi_class
def group_cats(cats):
cats_out = []
for cat in cats:
cats_out.extend([(x, y, cat) for (x, y) in group_cat(cats[cat])])
cats_out.sort(key=lambda w: w[0])
return (sorted(cats.keys()), cats_out)
def group_cat(cat):
cat_out = []
letters = sorted(set(cat))
cur_start = letters.pop(0)
cur_end = cur_start
for letter in letters:
assert letter > cur_end, \
"cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
if letter == cur_end + 1:
cur_end = letter
else:
cat_out.append((cur_start, cur_end))
cur_start = cur_end = letter
cat_out.append((cur_start, cur_end))
return cat_out
def format_table_content(f, content, indent):
line = " "*indent
first = True
for chunk in content.split(","):
if len(line) + len(chunk) < 98:
if first:
line += chunk
else:
line += ", " + chunk
first = False
else:
f.write(line + ",\n")
line = " "*indent + chunk
f.write(line)
def escape_char(c):
return "'\\u{%x}'" % c
def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))):
pub_string = ""
if is_pub:
pub_string = "pub "
f.write(" %sconst %s: %s = &[\n" % (pub_string, name, t_type))
data = ""
first = True
for dat in t_data:
if not first:
data += ","
first = False
data += pfun(dat)
format_table_content(f, data, 8)
f.write("\n ];\n\n")
def emit_bidi_module(f, bidi_class, cats):
f.write("""pub use self::BidiClass::*;
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
/// Represents the Unicode character property **Bidi_Class**, also known as
/// the *bidirectional character type*.
///
/// Use the `bidi_class` function to look up the BidiClass of a code point.
///
/// http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types
pub enum BidiClass {
""")
for cat in cats:
f.write(" " + cat + ",\n")
f.write(""" }
fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
use ::std::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Ok(idx) => {
let (_, _, cat) = r[idx];
cat
}
// UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
// for Bidi_Class have the value Left_To_Right (L)."
Err(_) => L
}
}
/// Find the BidiClass of a single char.
pub fn bidi_class(c: char) -> BidiClass {
bsearch_range_value_table(c, bidi_class_table)
}
""")
emit_table(f, "bidi_class_table", bidi_class, "&'static [(char, char, BidiClass)]",
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]),
is_pub=False)
if __name__ == "__main__":
os.chdir("../src/") # changing download path to /unicode-bidi/src/
r = "tables.rs"
# downloading the test case files
fetch("BidiTest.txt")
fetch("BidiCharacterTest.txt")
if os.path.exists(r):
os.remove(r)
with open(r, "w") as rf:
# write the file's preamble
rf.write(preamble)
# download and parse all the data
fetch("ReadMe.txt")
with open("ReadMe.txt") as readme:
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
unicode_version = re.search(pattern, readme.read()).groups()
rf.write("""
/// The version of [Unicode](http://www.unicode.org/)
/// that the `bidi_class` function is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
""" % unicode_version)
(bidi_cats, bidi_class) = load_unicode_data("UnicodeData.txt")
emit_bidi_module(rf, bidi_class, bidi_cats)

View File

@ -1 +0,0 @@
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"2282653731b0b47b955075a5a366b16c4ea092dbe7de4f9bb68a0e36abbb62f3",".travis.yml":"493704d6a0d0f27ad2ad6e950f8bce5f42d9ec4081daeb3c5a48066f1030f467","COPYRIGHT":"23860c2a7b5d96b21569afedf033469bab9fe14a1b24a35068b8641c578ce24d","Cargo.toml":"098ff7b940aedf372f29c54788e1a58173a54846c88fec5c627b3d74d013298c","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"8a937b4305a42bfa5a2faf7baaaae8156d055537934a53eed2d3bc9a3f9da40a","scripts/unicode.py":"64a1b919ab0e251fdb1db8b9c5363d84227fca33ac8375268bb88b74c4462f8f","scripts/unicode_gen_normtests.py":"da891d433fa58068747a1cd121774435b7d486394ce5c85c8079b227d20ea507","src/DerivedNormalizationProps.txt":"6cc76100d402d7dd12bd769ac61158b91e71aad58c8333abb848f1b9c9fdde2d","src/ReadMe.txt":"e4dc6045cb89ddaca938ff7ffffc2c2aafc5d4fc85023fffb9b36d4aee47986b","src/UnicodeData.txt":"38b17e1118206489a7e0ab5d29d7932212d38838df7d3ec025ecb58e8798ec20","src/decompose.rs":"04818a6b0271412ec58508e44535b18c58c80384a5836fe5b2c24e489a5ab4cc","src/lib.rs":"d3cab5f68a71b640da82859d06d348a0c3050d611c17cc9cb66989492437a98c","src/normalize.rs":"6bcc92e3f2ee9922a017a7b7adc8dfd88de4bcf777974e07da3688559d0ec008","src/recompose.rs":"96b8aea91e09f1fa439467378f2d1fa2aa4c81c86e597d3d36a2a35d7750a0d6","src/tables.rs":"bc930c04ed78307e624a225ec27679cf3fad00ba13cb2f283963476cd449e84e","src/test.rs":"e47de49aeef9231f5ff177eeb9946618577fc67ed798e889b99710c877e92e1b","src/testdata.rs":"6a67ae0a072c7d65fca0d101f4f5831592ff24ab137f8e1c4f9524f2bdc8d63e"},"package":"26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"}

View File

@ -1,4 +0,0 @@
target
Cargo.lock
scripts/tmp
*.pyc

View File

@ -1,20 +0,0 @@
language: rust
sudo: false
script:
- cargo build --verbose
- cargo test --verbose
- rustdoc --test README.md -L target/debug -L target/debug/deps
- cargo doc
after_success: |
[ $TRAVIS_BRANCH = master ] &&
[ $TRAVIS_PULL_REQUEST = false ] &&
echo '<meta http-equiv=refresh content=0;url=unicode_normalization/index.html>' > target/doc/index.html &&
pip install ghp-import --user $USER &&
$HOME/.local/bin/ghp-import -n target/doc &&
git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
env:
global:
secure: Te91dtDN8uv5OBxVuMy+nvQ5GtnLU9r6amS9p6IbblVXyzXgXPQdFfAND+GXXfZNnsjAyS2LnZL4NcNOR4JF63H0KxTTDIGXLSUAmc0C98UhqWWvv5bjz4mY0YKs9MwuTBX9P0LdTZjvRTd+yJ2PYH7ORGen+ZuOmlPNE7lpzrg=
notifications:
email:
on_success: never

View File

@ -1,7 +0,0 @@
Licensed under the Apache License, Version 2.0
<LICENSE-APACHE or
http://www.apache.org/licenses/LICENSE-2.0> or the MIT
license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
at your option. All files in the project carrying such
notice may not be copied, modified, or distributed except
according to those terms.

View File

@ -1,21 +0,0 @@
[package]
name = "unicode-normalization"
version = "0.1.2"
authors = ["kwantam <kwantam@gmail.com>"]
homepage = "https://github.com/unicode-rs/unicode-normalization"
repository = "https://github.com/unicode-rs/unicode-normalization"
documentation = "https://unicode-rs.github.io/unicode-normalization"
license = "MIT/Apache-2.0"
keywords = ["text", "unicode", "normalization", "decomposition", "recomposition"]
readme = "README.md"
description = """
This crate provides functions for normalization of
Unicode strings, including Canonical and Compatible
Decomposition and Recomposition, as described in
Unicode Standard Annex #15.
"""
exclude = [ "target/*", "Cargo.lock", "scripts/tmp" ]

View File

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,25 +0,0 @@
Copyright (c) 2015 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -1,32 +0,0 @@
Unicode character composition and decomposition utilities
as described in
[Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
[![Build Status](https://travis-ci.org/unicode-rs/unicode-normalization.svg)](https://travis-ci.org/unicode-rs/unicode-normalization)
[Documentation](https://unicode-rs.github.io/unicode-normalization/unicode_normalization/index.html)
```rust
extern crate unicode_normalization;
use unicode_normalization::char::compose;
use unicode_normalization::UnicodeNormalization;
fn main() {
assert_eq!(compose('A','\u{30a}'), Some('Å'));
let s = "ÅΩ";
let c = s.nfc().collect::<String>();
assert_eq!(c, "ÅΩ");
}
```
# crates.io
You can use this package in your project by adding the following
to your `Cargo.toml`:
```toml
[dependencies]
unicode-normalization = "0.1.1"
```

View File

@ -1,372 +0,0 @@
#!/usr/bin/env python
#
# Copyright 2011-2015 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
# This script uses the following Unicode tables:
# - DerivedNormalizationProps.txt
# - ReadMe.txt
# - UnicodeData.txt
#
# Since this should not require frequent updates, we just store this
# out-of-line and check the unicode.rs file into git.
import fileinput, re, os, sys
preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
'''
# Mapping taken from Table 12 from:
# http://www.unicode.org/reports/tr44/#General_Category_Values
expanded_categories = {
'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
'Lm': ['L'], 'Lo': ['L'],
'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
}
# these are the surrogate codepoints, which are not valid rust characters
surrogate_codepoints = (0xd800, 0xdfff)
def fetch(f):
if not os.path.exists(os.path.basename(f)):
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
% f)
if not os.path.exists(os.path.basename(f)):
sys.stderr.write("cannot load %s" % f)
exit(1)
def is_surrogate(n):
return surrogate_codepoints[0] <= n <= surrogate_codepoints[1]
def load_unicode_data(f):
fetch(f)
combines = {}
canon_decomp = {}
compat_decomp = {}
general_category_mark = []
udict = {};
range_start = -1;
for line in fileinput.input(f):
data = line.split(';');
if len(data) != 15:
continue
cp = int(data[0], 16);
if is_surrogate(cp):
continue
if range_start >= 0:
for i in xrange(range_start, cp):
udict[i] = data;
range_start = -1;
if data[1].endswith(", First>"):
range_start = cp;
continue;
udict[cp] = data;
for code in udict:
[code_org, name, gencat, combine, bidi,
decomp, deci, digit, num, mirror,
old, iso, upcase, lowcase, titlecase ] = udict[code];
# store decomposition, if given
if decomp != "":
if decomp.startswith('<'):
seq = []
for i in decomp.split()[1:]:
seq.append(int(i, 16))
compat_decomp[code] = seq
else:
seq = []
for i in decomp.split():
seq.append(int(i, 16))
canon_decomp[code] = seq
# record combining class, if any
if combine != "0":
if combine not in combines:
combines[combine] = []
combines[combine].append(code)
if 'M' in [gencat] + expanded_categories.get(gencat, []):
general_category_mark.append(code)
general_category_mark = group_cat(general_category_mark)
combines = to_combines(group_cats(combines))
return (canon_decomp, compat_decomp, combines, general_category_mark)
def group_cats(cats):
cats_out = {}
for cat in cats:
cats_out[cat] = group_cat(cats[cat])
return cats_out
def group_cat(cat):
cat_out = []
letters = sorted(set(cat))
cur_start = letters.pop(0)
cur_end = cur_start
for letter in letters:
assert letter > cur_end, \
"cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
if letter == cur_end + 1:
cur_end = letter
else:
cat_out.append((cur_start, cur_end))
cur_start = cur_end = letter
cat_out.append((cur_start, cur_end))
return cat_out
def ungroup_cat(cat):
cat_out = []
for (lo, hi) in cat:
while lo <= hi:
cat_out.append(lo)
lo += 1
return cat_out
def to_combines(combs):
combs_out = []
for comb in combs:
for (lo, hi) in combs[comb]:
combs_out.append((lo, hi, comb))
combs_out.sort(key=lambda comb: comb[0])
return combs_out
def format_table_content(f, content, indent):
line = " "*indent
first = True
for chunk in content.split(","):
if len(line) + len(chunk) < 98:
if first:
line += chunk
else:
line += ", " + chunk
first = False
else:
f.write(line + ",\n")
line = " "*indent + chunk
f.write(line)
def load_properties(f, interestingprops):
fetch(f)
props = {}
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
for line in fileinput.input(os.path.basename(f)):
prop = None
d_lo = 0
d_hi = 0
m = re1.match(line)
if m:
d_lo = m.group(1)
d_hi = m.group(1)
prop = m.group(2)
else:
m = re2.match(line)
if m:
d_lo = m.group(1)
d_hi = m.group(2)
prop = m.group(3)
else:
continue
if interestingprops and prop not in interestingprops:
continue
d_lo = int(d_lo, 16)
d_hi = int(d_hi, 16)
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))
# optimize if possible
for prop in props:
props[prop] = group_cat(ungroup_cat(props[prop]))
return props
def escape_char(c):
return "'\\u{%x}'" % c
def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))):
pub_string = ""
if is_pub:
pub_string = "pub "
f.write(" %sconst %s: %s = &[\n" % (pub_string, name, t_type))
data = ""
first = True
for dat in t_data:
if not first:
data += ","
first = False
data += pfun(dat)
format_table_content(f, data, 8)
f.write("\n ];\n\n")
def emit_norm_module(f, canon, compat, combine, norm_props, general_category_mark):
canon_keys = canon.keys()
canon_keys.sort()
compat_keys = compat.keys()
compat_keys.sort()
canon_comp = {}
comp_exclusions = norm_props["Full_Composition_Exclusion"]
for char in canon_keys:
if True in map(lambda (lo, hi): lo <= char <= hi, comp_exclusions):
continue
decomp = canon[char]
if len(decomp) == 2:
if not canon_comp.has_key(decomp[0]):
canon_comp[decomp[0]] = []
canon_comp[decomp[0]].append( (decomp[1], char) )
canon_comp_keys = canon_comp.keys()
canon_comp_keys.sort()
f.write("pub mod normalization {\n")
def mkdata_fun(table):
def f(char):
data = "(%s,&[" % escape_char(char)
first = True
for d in table[char]:
if not first:
data += ","
first = False
data += escape_char(d)
data += "])"
return data
return f
f.write(" // Canonical decompositions\n")
emit_table(f, "canonical_table", canon_keys, "&'static [(char, &'static [char])]",
pfun=mkdata_fun(canon))
f.write(" // Compatibility decompositions\n")
emit_table(f, "compatibility_table", compat_keys, "&'static [(char, &'static [char])]",
pfun=mkdata_fun(compat))
def comp_pfun(char):
data = "(%s,&[" % escape_char(char)
canon_comp[char].sort(lambda x, y: x[0] - y[0])
first = True
for pair in canon_comp[char]:
if not first:
data += ","
first = False
data += "(%s,%s)" % (escape_char(pair[0]), escape_char(pair[1]))
data += "])"
return data
f.write(" // Canonical compositions\n")
emit_table(f, "composition_table", canon_comp_keys,
"&'static [(char, &'static [(char, char)])]", pfun=comp_pfun)
f.write("""
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
use std::cmp::Ordering::{Equal, Less, Greater};
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Ok(idx) => {
let (_, _, result) = r[idx];
result
}
Err(_) => 0
}
}\n
""")
emit_table(f, "combining_class_table", combine, "&'static [(char, char, u8)]", is_pub=False,
pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]))
f.write(" pub fn canonical_combining_class(c: char) -> u8 {\n"
+ " bsearch_range_value_table(c, combining_class_table)\n"
+ " }\n")
f.write("""
fn bsearch_range_table(c: char, r: &'static [(char, char)]) -> bool {
use std::cmp::Ordering::{Equal, Less, Greater};
r.binary_search_by(|&(lo, hi)| {
if lo <= c && c <= hi {
Equal
} else if hi < c {
Less
} else {
Greater
}
})
.is_ok()
}
/// Return whether the given character is a combining mark (`General_Category=Mark`)
pub fn is_combining_mark(c: char) -> bool {
bsearch_range_table(c, general_category_mark)
}
""")
emit_table(f, "general_category_mark", combine, "&'static [(char, char)]", is_pub=False,
pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])))
f.write("""
}
""")
if __name__ == "__main__":
r = "tables.rs"
if os.path.exists(r):
os.remove(r)
with open(r, "w") as rf:
# write the file's preamble
rf.write(preamble)
# download and parse all the data
fetch("ReadMe.txt")
with open("ReadMe.txt") as readme:
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
unicode_version = re.search(pattern, readme.read()).groups()
rf.write("""
/// The version of [Unicode](http://www.unicode.org/)
/// that this version of unicode-normalization is based on.
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
""" % unicode_version)
(canon_decomp, compat_decomp, combines, general_category_mark) = \
load_unicode_data("UnicodeData.txt")
norm_props = load_properties("DerivedNormalizationProps.txt",
["Full_Composition_Exclusion"])
# normalizations and conversions module
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props,
general_category_mark)

View File

@ -1,81 +0,0 @@
#!/usr/bin/env python
#
# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
# This script uses the following Unicode tables:
# - NormalizationTest.txt
#
# Since this should not require frequent updates, we just store this
# out-of-line and check the unicode.rs file into git.
import unicode, re, os, fileinput
def load_test_data(f):
outls = []
testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$")
unicode.fetch(f)
for line in fileinput.input(os.path.basename(f)):
# comment and header lines start with # and @ respectively
if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@':
continue
m = testRe.match(line)
groups = []
if not m:
print "error: no match on line where test was expected: %s" % line
continue
has_surrogates = False
for i in range(1, 6):
group = []
chs = m.group(i).split()
for ch in chs:
intch = int(ch,16)
if unicode.is_surrogate(intch):
has_surrogates = True
break
group.append(intch)
if has_surrogates:
break
groups.append(group)
if has_surrogates:
continue
outls.append(groups)
return outls
def showfun(gs):
outstr = '('
gfirst = True
for g in gs:
if not gfirst:
outstr += ','
gfirst = False
outstr += '"'
for ch in g:
outstr += "\\u{%x}" % ch
outstr += '"'
outstr += ')'
return outstr
if __name__ == "__main__":
d = load_test_data("NormalizationTest.txt")
ntype = "&'static [(&'static str, &'static str, &'static str, &'static str, &'static str)]"
with open("testdata.rs", "w") as nf:
nf.write(unicode.preamble)
nf.write("\n")
nf.write(" // official Unicode test data\n")
nf.write(" // http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt\n")
unicode.emit_table(nf, "TEST_NORM", d, ntype, True, showfun)

File diff suppressed because it is too large Load Diff

View File

@ -1,17 +0,0 @@
# Date: 2015-06-16, 20:24:00 GMT [KW]
#
# Unicode Character Database
# Copyright (c) 1991-2015 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For documentation, see the following:
# NamesList.html
# UAX #38, "Unicode Han Database (Unihan)"
# UAX #44, "Unicode Character Database."
#
# The UAXes can be accessed at http://www.unicode.org/versions/Unicode8.0.0/
This directory contains the final data files
for the Unicode Character Database, for Version 8.0.0 of the Unicode
Standard.

File diff suppressed because it is too large Load Diff

View File

@ -1,135 +0,0 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Helper functions used for Unicode normalization
fn canonical_sort(comb: &mut [(char, u8)]) {
let len = comb.len();
for i in 0..len {
let mut swapped = false;
for j in 1..len-i {
let class_a = comb[j-1].1;
let class_b = comb[j].1;
if class_a != 0 && class_b != 0 && class_a > class_b {
comb.swap(j-1, j);
swapped = true;
}
}
if !swapped { break; }
}
}
#[derive(Clone)]
enum DecompositionType {
Canonical,
Compatible
}
/// External iterator for a string decomposition's characters.
#[derive(Clone)]
pub struct Decompositions<I> {
kind: DecompositionType,
iter: I,
buffer: Vec<(char, u8)>,
sorted: bool
}
#[inline]
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
Decompositions {
iter: iter,
buffer: Vec::new(),
sorted: false,
kind: self::DecompositionType::Canonical,
}
}
#[inline]
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
Decompositions {
iter: iter,
buffer: Vec::new(),
sorted: false,
kind: self::DecompositionType::Compatible,
}
}
impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
use self::DecompositionType::*;
match self.buffer.first() {
Some(&(c, 0)) => {
self.sorted = false;
self.buffer.remove(0);
return Some(c);
}
Some(&(c, _)) if self.sorted => {
self.buffer.remove(0);
return Some(c);
}
_ => self.sorted = false
}
if !self.sorted {
for ch in self.iter.by_ref() {
let buffer = &mut self.buffer;
let sorted = &mut self.sorted;
{
let callback = |d| {
let class =
super::char::canonical_combining_class(d);
if class == 0 && !*sorted {
canonical_sort(buffer);
*sorted = true;
}
buffer.push((d, class));
};
match self.kind {
Canonical => {
super::char::decompose_canonical(ch, callback)
}
Compatible => {
super::char::decompose_compatible(ch, callback)
}
}
}
if *sorted {
break
}
}
}
if !self.sorted {
canonical_sort(&mut self.buffer);
self.sorted = true;
}
if self.buffer.is_empty() {
None
} else {
match self.buffer.remove(0) {
(c, 0) => {
self.sorted = false;
Some(c)
}
(c, _) => Some(c),
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let (lower, _) = self.iter.size_hint();
(lower, None)
}
}

View File

@ -1,138 +0,0 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Unicode character composition and decomposition utilities
//! as described in
//! [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
//!
//! ```rust
//! extern crate unicode_normalization;
//!
//! use unicode_normalization::char::compose;
//! use unicode_normalization::UnicodeNormalization;
//!
//! fn main() {
//! assert_eq!(compose('A','\u{30a}'), Some('Å'));
//!
//! let s = "ÅΩ";
//! let c = s.nfc().collect::<String>();
//! assert_eq!(c, "ÅΩ");
//! }
//! ```
//!
//! # crates.io
//!
//! You can use this package in your project by adding the following
//! to your `Cargo.toml`:
//!
//! ```toml
//! [dependencies]
//! unicode-normalization = "0.1.0"
//! ```
#![deny(missing_docs, unsafe_code)]
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
pub use tables::UNICODE_VERSION;
pub use decompose::Decompositions;
pub use recompose::Recompositions;
use std::str::Chars;
mod decompose;
mod normalize;
mod recompose;
mod tables;
#[cfg(test)]
mod test;
#[cfg(test)]
mod testdata;
/// Methods for composing and decomposing characters.
pub mod char {
pub use normalize::{decompose_canonical, decompose_compatible, compose};
/// Look up the canonical combining class of a character.
pub use tables::normalization::canonical_combining_class;
/// Return whether the given character is a combining mark (`General_Category=Mark`)
pub use tables::normalization::is_combining_mark;
}
/// Methods for iterating over strings while applying Unicode normalizations
/// as described in
/// [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/).
pub trait UnicodeNormalization<I: Iterator<Item=char>> {
/// Returns an iterator over the string in Unicode Normalization Form D
/// (canonical decomposition).
#[inline]
fn nfd(self) -> Decompositions<I>;
/// Returns an iterator over the string in Unicode Normalization Form KD
/// (compatibility decomposition).
#[inline]
fn nfkd(self) -> Decompositions<I>;
/// An Iterator over the string in Unicode Normalization Form C
/// (canonical decomposition followed by canonical composition).
#[inline]
fn nfc(self) -> Recompositions<I>;
/// An Iterator over the string in Unicode Normalization Form KC
/// (compatibility decomposition followed by canonical composition).
#[inline]
fn nfkc(self) -> Recompositions<I>;
}
impl<'a> UnicodeNormalization<Chars<'a>> for &'a str {
#[inline]
fn nfd(self) -> Decompositions<Chars<'a>> {
decompose::new_canonical(self.chars())
}
#[inline]
fn nfkd(self) -> Decompositions<Chars<'a>> {
decompose::new_compatible(self.chars())
}
#[inline]
fn nfc(self) -> Recompositions<Chars<'a>> {
recompose::new_canonical(self.chars())
}
#[inline]
fn nfkc(self) -> Recompositions<Chars<'a>> {
recompose::new_compatible(self.chars())
}
}
impl<I: Iterator<Item=char>> UnicodeNormalization<I> for I {
#[inline]
fn nfd(self) -> Decompositions<I> {
decompose::new_canonical(self)
}
#[inline]
fn nfkd(self) -> Decompositions<I> {
decompose::new_compatible(self)
}
#[inline]
fn nfc(self) -> Recompositions<I> {
recompose::new_canonical(self)
}
#[inline]
fn nfkc(self) -> Recompositions<I> {
recompose::new_compatible(self)
}
}

View File

@ -1,158 +0,0 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Functions for computing canonical and compatible decompositions for Unicode characters.
use std::cmp::Ordering::{Equal, Less, Greater};
use std::ops::FnMut;
use tables::normalization::{canonical_table, compatibility_table, composition_table};
fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> {
match r.binary_search_by(|&(val, _)| {
if c == val { Equal }
else if val < c { Less }
else { Greater }
}) {
Ok(idx) => {
let (_, result) = r[idx];
Some(result)
}
Err(_) => None
}
}
/// Compute canonical Unicode decomposition for character.
/// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
/// for more information.
pub fn decompose_canonical<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, false); }
/// Compute canonical or compatible Unicode decomposition for character.
/// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
/// for more information.
pub fn decompose_compatible<F>(c: char, mut i: F) where F: FnMut(char) { d(c, &mut i, true); }
// FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F`
fn d<F>(c: char, i: &mut F, k: bool) where F: FnMut(char) {
// 7-bit ASCII never decomposes
if c <= '\x7f' { (*i)(c); return; }
// Perform decomposition for Hangul
if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
decompose_hangul(c, i);
return;
}
// First check the canonical decompositions
match bsearch_table(c, canonical_table) {
Some(canon) => {
for x in canon {
d(*x, i, k);
}
return;
}
None => ()
}
// Bottom out if we're not doing compat.
if !k { (*i)(c); return; }
// Then check the compatibility decompositions
match bsearch_table(c, compatibility_table) {
Some(compat) => {
for x in compat {
d(*x, i, k);
}
return;
}
None => ()
}
// Finally bottom out.
(*i)(c);
}
/// Compose two characters into a single character, if possible.
/// See [Unicode Standard Annex #15](http://www.unicode.org/reports/tr15/)
/// for more information.
pub fn compose(a: char, b: char) -> Option<char> {
compose_hangul(a, b).or_else(|| {
match bsearch_table(a, composition_table) {
None => None,
Some(candidates) => {
match candidates.binary_search_by(|&(val, _)| {
if b == val { Equal }
else if val < b { Less }
else { Greater }
}) {
Ok(idx) => {
let (_, result) = candidates[idx];
Some(result)
}
Err(_) => None
}
}
}
})
}
// Constants from Unicode 7.0.0 Section 3.12 Conjoining Jamo Behavior
const S_BASE: u32 = 0xAC00;
const L_BASE: u32 = 0x1100;
const V_BASE: u32 = 0x1161;
const T_BASE: u32 = 0x11A7;
const L_COUNT: u32 = 19;
const V_COUNT: u32 = 21;
const T_COUNT: u32 = 28;
const N_COUNT: u32 = (V_COUNT * T_COUNT);
const S_COUNT: u32 = (L_COUNT * N_COUNT);
// FIXME(#19596) This is a workaround, we should use `F` instead of `&mut F`
// Decompose a precomposed Hangul syllable
#[allow(unsafe_code)]
#[inline(always)]
fn decompose_hangul<F>(s: char, f: &mut F) where F: FnMut(char) {
use std::mem::transmute;
let si = s as u32 - S_BASE;
let li = si / N_COUNT;
unsafe {
(*f)(transmute(L_BASE + li));
let vi = (si % N_COUNT) / T_COUNT;
(*f)(transmute(V_BASE + vi));
let ti = si % T_COUNT;
if ti > 0 {
(*f)(transmute(T_BASE + ti));
}
}
}
// Compose a pair of Hangul Jamo
#[allow(unsafe_code)]
#[inline(always)]
fn compose_hangul(a: char, b: char) -> Option<char> {
use std::mem::transmute;
let l = a as u32;
let v = b as u32;
// Compose an LPart and a VPart
if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
return unsafe { Some(transmute(r)) };
}
// Compose an LVPart and a TPart
if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) {
let r = l + (v - T_BASE);
return unsafe { Some(transmute(r)) };
}
None
}

View File

@ -1,137 +0,0 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::collections::VecDeque;
use decompose::Decompositions;
#[derive(Clone)]
enum RecompositionState {
Composing,
Purging,
Finished
}
/// External iterator for a string recomposition's characters.
#[derive(Clone)]
pub struct Recompositions<I> {
iter: Decompositions<I>,
state: RecompositionState,
buffer: VecDeque<char>,
composee: Option<char>,
last_ccc: Option<u8>
}
#[inline]
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
Recompositions {
iter: super::decompose::new_canonical(iter),
state: self::RecompositionState::Composing,
buffer: VecDeque::new(),
composee: None,
last_ccc: None,
}
}
#[inline]
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Recompositions<I> {
Recompositions {
iter: super::decompose::new_compatible(iter),
state : self::RecompositionState::Composing,
buffer: VecDeque::new(),
composee: None,
last_ccc: None,
}
}
impl<I: Iterator<Item=char>> Iterator for Recompositions<I> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
use self::RecompositionState::*;
loop {
match self.state {
Composing => {
for ch in self.iter.by_ref() {
let ch_class = super::char::canonical_combining_class(ch);
if self.composee.is_none() {
if ch_class != 0 {
return Some(ch);
}
self.composee = Some(ch);
continue;
}
let k = self.composee.clone().unwrap();
match self.last_ccc {
None => {
match super::char::compose(k, ch) {
Some(r) => {
self.composee = Some(r);
continue;
}
None => {
if ch_class == 0 {
self.composee = Some(ch);
return Some(k);
}
self.buffer.push_back(ch);
self.last_ccc = Some(ch_class);
}
}
}
Some(l_class) => {
if l_class >= ch_class {
// `ch` is blocked from `composee`
if ch_class == 0 {
self.composee = Some(ch);
self.last_ccc = None;
self.state = Purging;
return Some(k);
}
self.buffer.push_back(ch);
self.last_ccc = Some(ch_class);
continue;
}
match super::char::compose(k, ch) {
Some(r) => {
self.composee = Some(r);
continue;
}
None => {
self.buffer.push_back(ch);
self.last_ccc = Some(ch_class);
}
}
}
}
}
self.state = Finished;
if self.composee.is_some() {
return self.composee.take();
}
}
Purging => {
match self.buffer.pop_front() {
None => self.state = Composing,
s => return s
}
}
Finished => {
match self.buffer.pop_front() {
None => return self.composee.take(),
s => return s
}
}
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,155 +0,0 @@
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use UnicodeNormalization;
#[test]
fn test_nfd() {
macro_rules! t {
($input: expr, $expected: expr) => {
assert_eq!($input.nfd().collect::<String>(), $expected);
// A dummy iterator that is not std::str::Chars directly:
assert_eq!($input.chars().map(|c| c).nfd().collect::<String>(), $expected);
}
}
t!("abc", "abc");
t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
t!("\u{2026}", "\u{2026}");
t!("\u{2126}", "\u{3a9}");
t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
t!("a\u{301}", "a\u{301}");
t!("\u{301}a", "\u{301}a");
t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
t!("\u{ac1c}", "\u{1100}\u{1162}");
}
#[test]
fn test_nfkd() {
macro_rules! t {
($input: expr, $expected: expr) => {
assert_eq!($input.nfkd().collect::<String>(), $expected);
}
}
t!("abc", "abc");
t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
t!("\u{2026}", "...");
t!("\u{2126}", "\u{3a9}");
t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
t!("a\u{301}", "a\u{301}");
t!("\u{301}a", "\u{301}a");
t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
t!("\u{ac1c}", "\u{1100}\u{1162}");
}
#[test]
fn test_nfc() {
macro_rules! t {
($input: expr, $expected: expr) => {
assert_eq!($input.nfc().collect::<String>(), $expected);
}
}
t!("abc", "abc");
t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
t!("\u{2026}", "\u{2026}");
t!("\u{2126}", "\u{3a9}");
t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
t!("a\u{301}", "\u{e1}");
t!("\u{301}a", "\u{301}a");
t!("\u{d4db}", "\u{d4db}");
t!("\u{ac1c}", "\u{ac1c}");
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
}
#[test]
fn test_nfkc() {
macro_rules! t {
($input: expr, $expected: expr) => {
assert_eq!($input.nfkc().collect::<String>(), $expected);
}
}
t!("abc", "abc");
t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
t!("\u{2026}", "...");
t!("\u{2126}", "\u{3a9}");
t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
t!("a\u{301}", "\u{e1}");
t!("\u{301}a", "\u{301}a");
t!("\u{d4db}", "\u{d4db}");
t!("\u{ac1c}", "\u{ac1c}");
t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
}
#[test]
fn test_official() {
use testdata::TEST_NORM;
macro_rules! normString {
($method: ident, $input: expr) => { $input.$method().collect::<String>() }
}
for &(s1, s2, s3, s4, s5) in TEST_NORM {
// these invariants come from the CONFORMANCE section of
// http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
{
let r1 = normString!(nfc, s1);
let r2 = normString!(nfc, s2);
let r3 = normString!(nfc, s3);
let r4 = normString!(nfc, s4);
let r5 = normString!(nfc, s5);
assert_eq!(s2, &r1[..]);
assert_eq!(s2, &r2[..]);
assert_eq!(s2, &r3[..]);
assert_eq!(s4, &r4[..]);
assert_eq!(s4, &r5[..]);
}
{
let r1 = normString!(nfd, s1);
let r2 = normString!(nfd, s2);
let r3 = normString!(nfd, s3);
let r4 = normString!(nfd, s4);
let r5 = normString!(nfd, s5);
assert_eq!(s3, &r1[..]);
assert_eq!(s3, &r2[..]);
assert_eq!(s3, &r3[..]);
assert_eq!(s5, &r4[..]);
assert_eq!(s5, &r5[..]);
}
{
let r1 = normString!(nfkc, s1);
let r2 = normString!(nfkc, s2);
let r3 = normString!(nfkc, s3);
let r4 = normString!(nfkc, s4);
let r5 = normString!(nfkc, s5);
assert_eq!(s4, &r1[..]);
assert_eq!(s4, &r2[..]);
assert_eq!(s4, &r3[..]);
assert_eq!(s4, &r4[..]);
assert_eq!(s4, &r5[..]);
}
{
let r1 = normString!(nfkd, s1);
let r2 = normString!(nfkd, s2);
let r3 = normString!(nfkd, s3);
let r4 = normString!(nfkd, s4);
let r5 = normString!(nfkd, s5);
assert_eq!(s5, &r1[..]);
assert_eq!(s5, &r2[..]);
assert_eq!(s5, &r3[..]);
assert_eq!(s5, &r4[..]);
assert_eq!(s5, &r5[..]);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
{"files":{".cargo-ok":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",".gitignore":"e20a03cb660e0039506f8828204fd93815ebfe051ef62194f8dcf3fc8b7d0e5a",".travis.yml":"2a3033f9edf86bc829de486fc1e74fe0bbe3166ed99b0139754ea29772c19b06","Cargo.toml":"30055a6d4ff3d1bf29a9fa99401e4ffffeecc4b5bade830843a9a5f1634fefd1","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"20c7855c364d57ea4c97889a5e8d98470a9952dade37bd9248b9a54431670e5e","Makefile":"92b64915724e3b5fec95ec96c5af738bd219b80709e18bf8639d784bc3d9a600","README.md":"eb3f4694003f408cbe3c7f3e9fbbc71241defb940cc55a816981f0f0f144c8eb","UPGRADING.md":"fbcc2d39bdf17db0745793db6626fcd5c909dddd4ce13b27566cfabece22c368","appveyor.yml":"c78486dbfbe6ebbf3d808afb9a19f7ec18c4704ce451c6305f0716999b70a1a6","docs/.nojekyll":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","docs/404.html":"f61e6271c1ea1aa113b64b356e994595fa548f0433f89948d747503ad22195cd","docs/index.html":"f61e6271c1ea1aa113b64b356e994595fa548f0433f89948d747503ad22195cd","github.png":"b432fd855efe7c430fe6a57ccf83935c1996f03a7cdc8d6e1b34154b8c43f6ec","rust-url-todo":"1192cee7b6cedf2133d97dc6074b593a1d19b0ee13fff6f28d6329855044e575","src/encoding.rs":"7fb43e1c109bf9f2a80a05525082f90e79dba8e8056547571c49fba074406d39","src/form_urlencoded.rs":"172922f2c51eb8dae0182d70e5e0c2969f1b5b7aac026720ced9f84059465999","src/host.rs":"a654dc3c9ce9024b98698e000bc97abaac7f10c49b1635be8125cf72376fd5d1","src/lib.rs":"c3542aabc733f76a3b9c7d24b5c41e60e9eb84d2712660611300d1de0e7c2072","src/origin.rs":"d52010a280d363aed6832f7de5e52c1a62815302e59dcbc9cdc2574e2ac884b9","src/parser.rs":"8ca331ada66f309e91fcc375d8467c929b453979fe0ac5abe190eb8a32074db8","src/path_segments.rs":"0414985c441d0c0292ccc6f56a144b84728ae03382476e6cae1a766f8c333ef8","src/percent_encoding.rs":"44d3321eaa1c77715e9ea1421519289ca73612a31c3d6fce04ff489dfa7db3c5","src/quirks.rs":"3249d1a1f73dd29ec06d626ea2ea4d61e7b2a782543742a5bee422b2f3864b19","src/slicing.rs":"4e539886b23945a92094625f3e531a4bff40daa44240b5d19ee8577478c4f7fe","tests/data.rs":"d36f0ee509fb00524635a7968e336bb89674a82a4fcb06be189155e4b9d43db5","tests/setters_tests.json":"ebb439306ea748be6d0f93132cb59220c5805afd56357d6017f1eb1e4f0f80b3","tests/unit.rs":"9a05f55c7b381b58ee67ef8ef145220e5df88a403225a1a324a36afc9bb6a7d7","tests/urltestdata.json":"11abe4a459566c3929e3326828f8576cb59ec8fab1d191714c34eac28d5f15f9"},"package":"8527c62d9869a08325c38272b3f85668df22a65890c61a639d233dc0ed0b23a2"}

View File

View File

@ -1,3 +0,0 @@
target
Cargo.lock
/.cargo/config

View File

@ -1,8 +0,0 @@
language: rust
rust:
- nightly
- beta
- stable
script: make test
notifications:
webhooks: http://build.servo.org:54856/travis

View File

@ -1,38 +0,0 @@
[package]
name = "url"
version = "1.2.1"
authors = ["The rust-url developers"]
description = "URL library for Rust, based on the WHATWG URL Standard"
documentation = "http://servo.github.io/rust-url/url/index.html"
repository = "https://github.com/servo/rust-url"
readme = "README.md"
keywords = ["url", "parser"]
license = "MIT/Apache-2.0"
[[test]]
name = "unit"
[[test]]
name = "data"
harness = false
[lib]
test = false
[dev-dependencies]
rustc-test = "0.1"
rustc-serialize = "0.3"
[features]
query_encoding = ["encoding"]
heap_size = ["heapsize"]
[dependencies]
encoding = {version = "0.2", optional = true}
heapsize = {version = ">=0.1.1, <0.4", optional = true}
idna = { version = "0.1.0", path = "./idna" }
matches = "0.1"
rustc-serialize = {version = "0.3", optional = true}
serde = {version = ">=0.6.1, <0.9", optional = true}

View File

@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,25 +0,0 @@
Copyright (c) 2013-2016 The rust-url developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -1,5 +0,0 @@
test:
cargo test --features "query_encoding serde rustc-serialize"
[ x$$TRAVIS_RUST_VERSION != xnightly ] || cargo test --features heapsize
.PHONY: test

View File

@ -1,10 +0,0 @@
rust-url
========
[![Travis build Status](https://travis-ci.org/servo/rust-url.svg?branch=master)](https://travis-ci.org/servo/rust-url) [![Appveyor build status](https://ci.appveyor.com/api/projects/status/ulkqx2xcemyod6xa?svg=true)](https://ci.appveyor.com/project/Manishearth/rust-url)
URL library for Rust, based on the [URL Standard](https://url.spec.whatwg.org/).
[Documentation](https://docs.rs/url/)
Please see [UPGRADING.md](https://github.com/servo/rust-url/blob/master/UPGRADING.md) if you are upgrading from 0.x to 1.x.

View File

@ -1,263 +0,0 @@
# Guide to upgrading from url 0.x to 1.x
* The fields of `Url` are now private because the `Url` constructor, parser,
and setters maintain invariants that could be violated if you were to set the fields directly.
Instead of accessing, for example, `url.scheme`, use the getter method, such as `url.scheme()`.
Instead of assigning directly to a field, for example `url.scheme = "https".to_string()`,
use the setter method, such as `url.set_scheme("https").unwrap()`.
(Some setters validate the new value and return a `Result` that must be used).
* The methods of `Url` now return `&str` instead of `String`,
thus reducing allocations and making serialization cheap.
* The `path()` method on `url::Url` instances used to return `Option<&[String]>`;
now it returns `&str`.
If you would like functionality more similar to the old behavior of `path()`,
use `path_segments()` that returns `Option<str::Split<char>>`.
Before upgrading:
```rust
let issue_list_url = Url::parse(
"https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
).unwrap();
assert_eq!(issue_list_url.path(), Some(&["rust-lang".to_string(),
"rust".to_string(),
"issues".to_string()][..]));
```
After upgrading:
```rust
let issue_list_url = Url::parse(
"https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
).unwrap();
assert_eq!(issue_list_url.path(), "/rust-lang/rust/issues");
assert_eq!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()),
Some(vec!["rust-lang", "rust", "issues"]));
```
* The `path_mut()` method on `url::Url` instances that allowed modification of a URL's path
has been replaced by `path_segments_mut()`.
Before upgrading:
```rust
let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap();
url.path_mut().unwrap().push("issues");
```
After upgrading:
```rust
let mut url = Url::parse("https://github.com/rust-lang/rust").unwrap();
url.path_segments_mut().unwrap().push("issues");
```
* The `domain_mut()` method on `url::Url` instances that allowed modification of a URL's domain
has been replaced by `set_host()` and `set_ip_host()`.
* The `host()` method on `url::Url` instances used to return `Option<&Host>`;
now it returns `Option<Host<&str>>`.
The `serialize_host()` method that returned `Option<String>`
has been replaced by the `host_str()` method that returns `Option<&str>`.
* The `serialize()` method on `url::Url` instances that returned `String`
has been replaced by an `as_str()` method that returns `&str`.
Before upgrading:
```rust
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
assert_eq!(this_document.serialize(), "http://servo.github.io/rust-url/url/index.html".to_string());
```
After upgrading:
```rust
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
assert_eq!(this_document.as_str(), "http://servo.github.io/rust-url/url/index.html");
```
* `url::UrlParser` has been replaced by `url::Url::parse()` and `url::Url::join()`.
Before upgrading:
```rust
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
let css_url = UrlParser::new().base_url(&this_document).parse("../main.css").unwrap();
assert_eq!(css_url.serialize(), "http://servo.github.io/rust-url/main.css".to_string());
```
After upgrading:
```rust
let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html").unwrap();
let css_url = this_document.join("../main.css").unwrap();
assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
```
* `url::parse_path()` and `url::UrlParser::parse_path()` have been removed without replacement.
As a workaround, you can give a base URL that you then ignore too `url::Url::parse()`.
Before upgrading:
```rust
let (path, query, fragment) = url::parse_path("/foo/bar/../baz?q=42").unwrap();
assert_eq!(path, vec!["foo".to_string(), "baz".to_string()]);
assert_eq!(query, Some("q=42".to_string()));
assert_eq!(fragment, None);
```
After upgrading:
```rust
let base = Url::parse("http://example.com").unwrap();
let with_path = base.join("/foo/bar/../baz?q=42").unwrap();
assert_eq!(with_path.path(), "/foo/baz");
assert_eq!(with_path.query(), Some("q=42"));
assert_eq!(with_path.fragment(), None);
```
* The `url::form_urlencoded::serialize()` method
has been replaced with the `url::form_urlencoded::Serializer` struct.
Instead of calling `serialize()` with key/value pairs,
create a new `Serializer` with a new string,
call the `extend_pairs()` method on the `Serializer` instance with the key/value pairs as the argument,
then call `finish()`.
Before upgrading:
```rust
let form = url::form_urlencoded::serialize(form.iter().map(|(k, v)| {
(&k[..], &v[..])
}));
```
After upgrading:
```rust
let form = url::form_urlencoded::Serializer::new(String::new()).extend_pairs(
form.iter().map(|(k, v)| { (&k[..], &v[..]) })
).finish();
```
* The `set_query_from_pairs()` method on `url::Url` instances that took key/value pairs
has been replaced with `query_pairs_mut()`, which allows you to modify the `url::Url`'s query pairs.
Before upgrading:
```rust
let mut url = Url::parse("https://duckduckgo.com/").unwrap();
let pairs = vec![
("q", "test"),
("ia", "images"),
];
url.set_query_from_pairs(pairs.iter().map(|&(k, v)| {
(&k[..], &v[..])
}));
```
After upgrading:
```rust
let mut url = Url::parse("https://duckduckgo.com/").unwrap();
let pairs = vec![
("q", "test"),
("ia", "images"),
];
url.query_pairs_mut().clear().extend_pairs(
pairs.iter().map(|&(k, v)| { (&k[..], &v[..]) })
);
```
* `url::SchemeData`, its variants `Relative` and `NonRelative`,
and the struct `url::RelativeSchemeData` have been removed.
Instead of matching on these variants
to determine if you have a URL in a relative scheme such as HTTP
versus a URL in a non-relative scheme as data,
use the `cannot_be_a_base()` method to determine which kind you have.
Before upgrading:
```rust
match url.scheme_data {
url::SchemeData::Relative(..) => {}
url::SchemeData::NonRelative(..) => {
return Err(human(format!("`{}` must have relative scheme \
data: {}", field, url)))
}
}
```
After upgrading:
```rust
if url.cannot_be_a_base() {
return Err(human(format!("`{}` must have relative scheme \
data: {}", field, url)))
}
```
* The functions `url::whatwg_scheme_type_mapper()`, the `SchemeType` enum,
and the `scheme_type_mapper()` method on `url::UrlParser` instances have been removed.
`SchemeType` had a method for getting the `default_port()`;
to replicate this functionality, use the method `port_or_known_default()` on `url::Url` instances.
The `port_or_default()` method on `url::Url` instances has been removed;
use `port_or_known_default()` instead.
Before upgrading:
```rust
let port = match whatwg_scheme_type_mapper(&url.scheme) {
SchemeType::Relative(port) => port,
_ => return Err(format!("Invalid special scheme: `{}`",
raw_url.scheme)),
};
```
After upgrading:
```rust
let port = match url.port_or_known_default() {
Some(port) => port,
_ => return Err(format!("Invalid special scheme: `{}`",
url.scheme())),
};
```
* The following formatting utilities have been removed without replacement;
look at their linked previous implementations
if you would like to replicate the functionality in your code:
* [`url::format::PathFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL24)
* [`url::format::UserInfoFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL50)
* [`url::format::UrlNoFragmentFormatter`](https://github.com/servo/rust-url/pull/176/commits/9e759f18726c8e1343162922b87163d4dd08fe3c#diff-0bb16ac13b75e9b568fa4aff61b0e71dL70)
* `url::percent_encoding::percent_decode()` used to have a return type of `Vec<u8>`;
now it returns an iterator of decoded `u8` bytes that also implements `Into<Cow<u8>>`.
Use `.into().to_owned()` to obtain a `Vec<u8>`.
(`.collect()` also works but might not be as efficient.)
* The `url::percent_encoding::EncodeSet` struct and constant instances
used with `url::percent_encoding::percent_encode()`
have been changed to structs that implement the trait `url::percent_encoding::EncodeSet`.
* `SIMPLE_ENCODE_SET`, `QUERY_ENCODE_SET`, `DEFAULT_ENCODE_SET`,
and `USERINFO_ENCODE_SET` have the same behavior.
* `USERNAME_ENCODE_SET` and `PASSWORD_ENCODE_SET` have been removed;
use `USERINFO_ENCODE_SET` instead.
* `HTTP_VALUE_ENCODE_SET` has been removed;
an implementation of it in the new types can be found [in hyper's source](
https://github.com/hyperium/hyper/blob/67436c5bf615cf5a55a71e32b788afef5985570e/src/header/parsing.rs#L131-L138)
if you need to replicate this functionality in your code.
* `FORM_URLENCODED_ENCODE_SET` has been removed;
instead, use the functionality in `url::form_urlencoded`.
* `PATH_SEGMENT_ENCODE_SET` has been added for use on '/'-separated path segments.
* `url::percent_encoding::percent_decode_to()` has been removed.
Use `url::percent_encoding::percent_decode()` which returns an iterator.
You can then use the iterators `collect()` method
or give it to some data structures `extend()` method.
* A number of `ParseError` variants have changed.
[See the documentation for the current set](http://servo.github.io/rust-url/url/enum.ParseError.html).
* `url::OpaqueOrigin::new()` and `url::Origin::UID(OpaqueOrigin)`
have been replaced by `url::Origin::new_opaque()` and `url::Origin::Opaque(OpaqueOrigin)`, respectively.

View File

@ -1,13 +0,0 @@
install:
- ps: Start-FileDownload 'https://static.rust-lang.org/dist/rust-nightly-i686-pc-windows-gnu.exe'
- rust-nightly-i686-pc-windows-gnu.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
- SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
- rustc -V
- cargo -V
- git submodule update --init --recursive
build: false
test_script:
- cargo build
- cargo test --verbose

View File

View File

@ -1,3 +0,0 @@
<meta http-equiv="refresh" content="0; url=https://docs.rs/url/">
<link rel="canonical" href="https://docs.rs/url/">
<a href="https://docs.rs/url/">Moved to docs.rs</a>

View File

@ -1,3 +0,0 @@
<meta http-equiv="refresh" content="0; url=https://docs.rs/url/">
<link rel="canonical" href="https://docs.rs/url/">
<a href="https://docs.rs/url/">Moved to docs.rs</a>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.6 KiB

View File

@ -1,14 +0,0 @@
* standalone path parsing?
* Test setters
* Test trim C0/space
* Test remove tab & newline
#[test]
fn test_path_segments() {
let mut url = Url::parse("http://example.net").unwrap();
url.push_path_segment("foo").unwrap();
url.extend_path_segments(&["bar", "b/az"]).unwrap();
assert_eq!(url.as_str(), "http://example.net/foo");
}

View File

@ -1,135 +0,0 @@
// Copyright 2013-2014 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Abstraction that conditionally compiles either to rust-encoding,
//! or to only support UTF-8.
#[cfg(feature = "query_encoding")] extern crate encoding;
use std::borrow::Cow;
#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap};
#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label;
#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef;
#[cfg(feature = "query_encoding")]
#[derive(Copy, Clone)]
pub struct EncodingOverride {
/// `None` means UTF-8.
encoding: Option<EncodingRef>
}
#[cfg(feature = "query_encoding")]
impl EncodingOverride {
pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self {
encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
}
pub fn from_encoding(encoding: EncodingRef) -> Self {
EncodingOverride {
encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) }
}
}
#[inline]
pub fn utf8() -> Self {
EncodingOverride { encoding: None }
}
pub fn lookup(label: &[u8]) -> Option<Self> {
// Don't use String::from_utf8_lossy since no encoding label contains U+FFFD
// https://encoding.spec.whatwg.org/#names-and-labels
::std::str::from_utf8(label)
.ok()
.and_then(encoding_from_whatwg_label)
.map(Self::from_encoding)
}
/// https://encoding.spec.whatwg.org/#get-an-output-encoding
pub fn to_output_encoding(self) -> Self {
if let Some(encoding) = self.encoding {
if matches!(encoding.name(), "utf-16le" | "utf-16be") {
return Self::utf8()
}
}
self
}
pub fn is_utf8(&self) -> bool {
self.encoding.is_none()
}
pub fn name(&self) -> &'static str {
match self.encoding {
Some(encoding) => encoding.name(),
None => "utf-8",
}
}
pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
match self.encoding {
// `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace`
Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(),
None => decode_utf8_lossy(input),
}
}
pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
match self.encoding {
// `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape`
Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()),
None => encode_utf8(input)
}
}
}
#[cfg(not(feature = "query_encoding"))]
#[derive(Copy, Clone)]
pub struct EncodingOverride;
#[cfg(not(feature = "query_encoding"))]
impl EncodingOverride {
#[inline]
pub fn utf8() -> Self {
EncodingOverride
}
pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
decode_utf8_lossy(input)
}
pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
encode_utf8(input)
}
}
pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
let raw_utf8: *const [u8];
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
Cow::Owned(s) => return s.into(),
}
// from_utf8_lossy returned a borrow of `bytes` unchanged.
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Reuse the existing `Vec` allocation.
unsafe { String::from_utf8_unchecked(bytes) }.into()
}
}
}
pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> {
match input {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes())
}
}

View File

@ -1,364 +0,0 @@
// Copyright 2013-2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
//! as used by HTML forms.
//!
//! Converts between a string (such as an URLs query string)
//! and a sequence of (name, value) pairs.
use encoding::EncodingOverride;
use percent_encoding::{percent_encode_byte, percent_decode};
use std::borrow::{Borrow, Cow};
use std::str;
/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
/// converted to `[("#first", "%try%")]`.
#[inline]
pub fn parse(input: &[u8]) -> Parse {
Parse {
input: input,
encoding: EncodingOverride::utf8(),
}
}
/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// This function is only available if the `query_encoding` Cargo feature is enabled.
///
/// Arguments:
///
/// * `encoding_override`: The character encoding each name and values is decoded as
/// after percent-decoding. Defaults to UTF-8.
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
#[cfg(feature = "query_encoding")]
pub fn parse_with_encoding<'a>(input: &'a [u8],
encoding_override: Option<::encoding::EncodingRef>,
use_charset: bool)
-> Result<Parse<'a>, ()> {
use std::ascii::AsciiExt;
let mut encoding = EncodingOverride::from_opt_encoding(encoding_override);
if !(encoding.is_utf8() || input.is_ascii()) {
return Err(())
}
if use_charset {
for sequence in input.split(|&b| b == b'&') {
// No '+' in "_charset_" to replace with ' '.
if sequence.starts_with(b"_charset_=") {
let value = &sequence[b"_charset_=".len()..];
// Skip replacing '+' with ' ' in value since no encoding label contains either:
// https://encoding.spec.whatwg.org/#names-and-labels
if let Some(e) = EncodingOverride::lookup(value) {
encoding = e;
break
}
}
}
}
Ok(Parse {
input: input,
encoding: encoding,
})
}
/// The return type of `parse()`.
#[derive(Copy, Clone)]
pub struct Parse<'a> {
input: &'a [u8],
encoding: EncodingOverride,
}
impl<'a> Iterator for Parse<'a> {
type Item = (Cow<'a, str>, Cow<'a, str>);
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.input.is_empty() {
return None
}
let mut split2 = self.input.splitn(2, |&b| b == b'&');
let sequence = split2.next().unwrap();
self.input = split2.next().unwrap_or(&[][..]);
if sequence.is_empty() {
continue
}
let mut split2 = sequence.splitn(2, |&b| b == b'=');
let name = split2.next().unwrap();
let value = split2.next().unwrap_or(&[][..]);
return Some((
decode(name, self.encoding),
decode(value, self.encoding),
))
}
}
}
fn decode(input: &[u8], encoding: EncodingOverride) -> Cow<str> {
let replaced = replace_plus(input);
encoding.decode(match percent_decode(&replaced).if_any() {
Some(vec) => Cow::Owned(vec),
None => replaced,
})
}
/// Replace b'+' with b' '
fn replace_plus<'a>(input: &'a [u8]) -> Cow<'a, [u8]> {
match input.iter().position(|&b| b == b'+') {
None => Cow::Borrowed(input),
Some(first_position) => {
let mut replaced = input.to_owned();
replaced[first_position] = b' ';
for byte in &mut replaced[first_position + 1..] {
if *byte == b'+' {
*byte = b' ';
}
}
Cow::Owned(replaced)
}
}
}
impl<'a> Parse<'a> {
/// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
pub fn into_owned(self) -> ParseIntoOwned<'a> {
ParseIntoOwned { inner: self }
}
}
/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
pub struct ParseIntoOwned<'a> {
inner: Parse<'a>
}
impl<'a> Iterator for ParseIntoOwned<'a> {
type Item = (String, String);
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|(k, v)| (k.into_owned(), v.into_owned()))
}
}
/// The [`application/x-www-form-urlencoded` byte serializer](
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
///
/// Return an iterator of `&str` slices.
pub fn byte_serialize(input: &[u8]) -> ByteSerialize {
ByteSerialize {
bytes: input,
}
}
/// Return value of `byte_serialize()`.
pub struct ByteSerialize<'a> {
bytes: &'a [u8],
}
fn byte_serialized_unchanged(byte: u8) -> bool {
matches!(byte, b'*' | b'-' | b'.' | b'0' ... b'9' | b'A' ... b'Z' | b'_' | b'a' ... b'z')
}
impl<'a> Iterator for ByteSerialize<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
if let Some((&first, tail)) = self.bytes.split_first() {
if !byte_serialized_unchanged(first) {
self.bytes = tail;
return Some(if first == b' ' { "+" } else { percent_encode_byte(first) })
}
let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
let (unchanged_slice, remaining) = match position {
// 1 for first_byte + i unchanged in tail
Some(i) => self.bytes.split_at(1 + i),
None => (self.bytes, &[][..]),
};
self.bytes = remaining;
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.bytes.is_empty() {
(0, Some(0))
} else {
(1, Some(self.bytes.len()))
}
}
}
/// The [`application/x-www-form-urlencoded` serializer](
/// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
pub struct Serializer<T: Target> {
target: Option<T>,
start_position: usize,
encoding: EncodingOverride,
}
pub trait Target {
fn as_mut_string(&mut self) -> &mut String;
fn finish(self) -> Self::Finished;
type Finished;
}
impl Target for String {
fn as_mut_string(&mut self) -> &mut String { self }
fn finish(self) -> Self { self }
type Finished = Self;
}
impl<'a> Target for &'a mut String {
fn as_mut_string(&mut self) -> &mut String { &mut **self }
fn finish(self) -> Self { self }
type Finished = Self;
}
// `as_mut_string` string here exposes the internal serialization of an `Url`,
// which should not be exposed to users.
// We achieve that by not giving users direct access to `UrlQuery`:
// * Its fields are private
// (and so can not be constructed with struct literal syntax outside of this crate),
// * It has no constructor
// * It is only visible (on the type level) to users in the return type of
// `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
// * `Serializer` keeps its target in a private field
// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
impl<'a> Target for ::UrlQuery<'a> {
fn as_mut_string(&mut self) -> &mut String { &mut self.url.serialization }
fn finish(self) -> &'a mut ::Url { self.url }
type Finished = &'a mut ::Url;
}
impl<T: Target> Serializer<T> {
/// Create a new `application/x-www-form-urlencoded` serializer for the given target.
///
/// If the target is non-empty,
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
pub fn new(target: T) -> Self {
Self::for_suffix(target, 0)
}
/// Create a new `application/x-www-form-urlencoded` serializer
/// for a suffix of the given target.
///
/// If that suffix is non-empty,
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
pub fn for_suffix(mut target: T, start_position: usize) -> Self {
&target.as_mut_string()[start_position..]; // Panic if out of bounds
Serializer {
target: Some(target),
start_position: start_position,
encoding: EncodingOverride::utf8(),
}
}
/// Remove any existing name/value pair.
///
/// Panics if called after `.finish()`.
pub fn clear(&mut self) -> &mut Self {
string(&mut self.target).truncate(self.start_position);
self
}
/// Set the character encoding to be used for names and values before percent-encoding.
#[cfg(feature = "query_encoding")]
pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self {
self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self
}
/// Serialize and append a name/value pair.
///
/// Panics if called after `.finish()`.
pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
append_pair(string(&mut self.target), self.start_position, self.encoding, name, value);
self
}
/// Serialize and append a number of name/value pairs.
///
/// This simply calls `append_pair` repeatedly.
/// This can be more convenient, so the user doesnt need to introduce a block
/// to limit the scope of `Serializer`s borrow of its string.
///
/// Panics if called after `.finish()`.
pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
where I: IntoIterator, I::Item: Borrow<(K, V)>, K: AsRef<str>, V: AsRef<str> {
{
let string = string(&mut self.target);
for pair in iter {
let &(ref k, ref v) = pair.borrow();
append_pair(string, self.start_position, self.encoding, k.as_ref(), v.as_ref());
}
}
self
}
/// Add a name/value pair whose name is `_charset_`
/// and whose value is the character encodings name.
/// (See the `encoding_override()` method.)
///
/// Panics if called after `.finish()`.
#[cfg(feature = "query_encoding")]
pub fn append_charset(&mut self) -> &mut Self {
{
let string = string(&mut self.target);
append_separator_if_needed(string, self.start_position);
string.push_str("_charset_=");
string.push_str(self.encoding.name());
}
self
}
/// If this serializer was constructed with a string, take and return that string.
///
/// ```rust
/// use url::form_urlencoded;
/// let encoded: String = form_urlencoded::Serializer::new(String::new())
/// .append_pair("foo", "bar & baz")
/// .append_pair("saison", "Été+hiver")
/// .finish();
/// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
/// ```
///
/// Panics if called more than once.
pub fn finish(&mut self) -> T::Finished {
self.target.take().expect("url::form_urlencoded::Serializer double finish").finish()
}
}
fn append_separator_if_needed(string: &mut String, start_position: usize) {
if string.len() > start_position {
string.push('&')
}
}
fn string<T: Target>(target: &mut Option<T>) -> &mut String {
target.as_mut().expect("url::form_urlencoded::Serializer finished").as_mut_string()
}
fn append_pair(string: &mut String, start_position: usize, encoding: EncodingOverride,
name: &str, value: &str) {
append_separator_if_needed(string, start_position);
string.extend(byte_serialize(&encoding.encode(name.into())));
string.push('=');
string.extend(byte_serialize(&encoding.encode(value.into())));
}

View File

@ -1,418 +0,0 @@
// Copyright 2013-2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
use std::cmp;
use std::fmt::{self, Formatter};
use std::io;
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6, ToSocketAddrs};
use std::vec;
use parser::{ParseResult, ParseError};
use percent_encoding::percent_decode;
use idna;
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum HostInternal {
None,
Domain,
Ipv4(Ipv4Addr),
Ipv6(Ipv6Addr),
}
#[cfg(feature = "heapsize")]
known_heap_size!(0, HostInternal);
impl<S> From<Host<S>> for HostInternal {
fn from(host: Host<S>) -> HostInternal {
match host {
Host::Domain(_) => HostInternal::Domain,
Host::Ipv4(address) => HostInternal::Ipv4(address),
Host::Ipv6(address) => HostInternal::Ipv6(address),
}
}
}
/// The host name of an URL.
#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum Host<S=String> {
/// A DNS domain name, as '.' dot-separated labels.
/// Non-ASCII labels are encoded in punycode per IDNA.
Domain(S),
/// An IPv4 address.
/// `Url::host_str` returns the serialization of this address,
/// as four decimal integers separated by `.` dots.
Ipv4(Ipv4Addr),
/// An IPv6 address.
/// `Url::host_str` returns the serialization of that address between `[` and `]` brackets,
/// in the format per [RFC 5952 *A Recommendation
/// for IPv6 Address Text Representation*](https://tools.ietf.org/html/rfc5952):
/// lowercase hexadecimal with maximal `::` compression.
Ipv6(Ipv6Addr),
}
#[cfg(feature = "heapsize")]
impl<S: HeapSizeOf> HeapSizeOf for Host<S> {
fn heap_size_of_children(&self) -> usize {
match *self {
Host::Domain(ref s) => s.heap_size_of_children(),
_ => 0,
}
}
}
impl<'a> Host<&'a str> {
/// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
pub fn to_owned(&self) -> Host<String> {
match *self {
Host::Domain(domain) => Host::Domain(domain.to_owned()),
Host::Ipv4(address) => Host::Ipv4(address),
Host::Ipv6(address) => Host::Ipv6(address),
}
}
}
impl Host<String> {
/// Parse a host: either an IPv6 address in [] square brackets, or a domain.
///
/// https://url.spec.whatwg.org/#host-parsing
pub fn parse(input: &str) -> Result<Self, ParseError> {
if input.starts_with("[") {
if !input.ends_with("]") {
return Err(ParseError::InvalidIpv6Address)
}
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6)
}
let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
let domain = try!(idna::domain_to_ascii(&domain));
if domain.find(|c| matches!(c,
'\0' | '\t' | '\n' | '\r' | ' ' | '#' | '%' | '/' | ':' | '?' | '@' | '[' | '\\' | ']'
)).is_some() {
return Err(ParseError::InvalidDomainCharacter)
}
if let Some(address) = try!(parse_ipv4addr(&domain)) {
Ok(Host::Ipv4(address))
} else {
Ok(Host::Domain(domain.into()))
}
}
}
impl<S: AsRef<str>> fmt::Display for Host<S> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match *self {
Host::Domain(ref domain) => domain.as_ref().fmt(f),
Host::Ipv4(ref addr) => addr.fmt(f),
Host::Ipv6(ref addr) => {
try!(f.write_str("["));
try!(write_ipv6(addr, f));
f.write_str("]")
}
}
}
}
/// This mostly exists because coherence rules dont allow us to implement
/// `ToSocketAddrs for (Host<S>, u16)`.
pub struct HostAndPort<S=String> {
pub host: Host<S>,
pub port: u16,
}
impl<'a> HostAndPort<&'a str> {
/// Return a copy of `self` that owns an allocated `String` but does not borrow an `&Url`.
pub fn to_owned(&self) -> HostAndPort<String> {
HostAndPort {
host: self.host.to_owned(),
port: self.port
}
}
}
impl<S: AsRef<str>> ToSocketAddrs for HostAndPort<S> {
type Iter = SocketAddrs;
fn to_socket_addrs(&self) -> io::Result<Self::Iter> {
let port = self.port;
match self.host {
Host::Domain(ref domain) => Ok(SocketAddrs {
// FIXME: use std::net::lookup_host when its stable.
state: SocketAddrsState::Domain(try!((domain.as_ref(), port).to_socket_addrs()))
}),
Host::Ipv4(address) => Ok(SocketAddrs {
state: SocketAddrsState::One(SocketAddr::V4(SocketAddrV4::new(address, port)))
}),
Host::Ipv6(address) => Ok(SocketAddrs {
state: SocketAddrsState::One(SocketAddr::V6(SocketAddrV6::new(address, port, 0, 0)))
}),
}
}
}
/// Socket addresses for an URL.
pub struct SocketAddrs {
state: SocketAddrsState
}
enum SocketAddrsState {
Domain(vec::IntoIter<SocketAddr>),
One(SocketAddr),
Done,
}
impl Iterator for SocketAddrs {
type Item = SocketAddr;
fn next(&mut self) -> Option<SocketAddr> {
match self.state {
SocketAddrsState::Domain(ref mut iter) => iter.next(),
SocketAddrsState::One(s) => {
self.state = SocketAddrsState::Done;
Some(s)
}
SocketAddrsState::Done => None
}
}
}
fn write_ipv6(addr: &Ipv6Addr, f: &mut Formatter) -> fmt::Result {
let segments = addr.segments();
let (compress_start, compress_end) = longest_zero_sequence(&segments);
let mut i = 0;
while i < 8 {
if i == compress_start {
try!(f.write_str(":"));
if i == 0 {
try!(f.write_str(":"));
}
if compress_end < 8 {
i = compress_end;
} else {
break;
}
}
try!(write!(f, "{:x}", segments[i as usize]));
if i < 7 {
try!(f.write_str(":"));
}
i += 1;
}
Ok(())
}
fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
let mut longest = -1;
let mut longest_length = -1;
let mut start = -1;
macro_rules! finish_sequence(
($end: expr) => {
if start >= 0 {
let length = $end - start;
if length > longest_length {
longest = start;
longest_length = length;
}
}
};
);
for i in 0..8 {
if pieces[i as usize] == 0 {
if start < 0 {
start = i;
}
} else {
finish_sequence!(i);
start = -1;
}
}
finish_sequence!(8);
(longest, longest + longest_length)
}
fn parse_ipv4number(mut input: &str) -> Result<u32, ()> {
let mut r = 10;
if input.starts_with("0x") || input.starts_with("0X") {
input = &input[2..];
r = 16;
} else if input.len() >= 2 && input.starts_with("0") {
input = &input[1..];
r = 8;
}
if input.is_empty() {
return Ok(0);
}
if input.starts_with("+") {
return Err(())
}
match u32::from_str_radix(&input, r) {
Ok(number) => Ok(number),
Err(_) => Err(()),
}
}
fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
if input.is_empty() {
return Ok(None)
}
let mut parts: Vec<&str> = input.split('.').collect();
if parts.last() == Some(&"") {
parts.pop();
}
if parts.len() > 4 {
return Ok(None);
}
let mut numbers: Vec<u32> = Vec::new();
for part in parts {
if part == "" {
return Ok(None);
}
if let Ok(n) = parse_ipv4number(part) {
numbers.push(n);
} else {
return Ok(None);
}
}
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
// Equivalent to: ipv4 >= 256 ** (4 numbers.len())
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
return Err(ParseError::InvalidIpv4Address);
}
if numbers.iter().any(|x| *x > 255) {
return Err(ParseError::InvalidIpv4Address);
}
for (counter, n) in numbers.iter().enumerate() {
ipv4 += n << (8 * (3 - counter as u32))
}
Ok(Some(Ipv4Addr::from(ipv4)))
}
fn parse_ipv6addr(input: &str) -> ParseResult<Ipv6Addr> {
let input = input.as_bytes();
let len = input.len();
let mut is_ip_v4 = false;
let mut pieces = [0, 0, 0, 0, 0, 0, 0, 0];
let mut piece_pointer = 0;
let mut compress_pointer = None;
let mut i = 0;
if len < 2 {
return Err(ParseError::InvalidIpv6Address)
}
if input[0] == b':' {
if input[1] != b':' {
return Err(ParseError::InvalidIpv6Address)
}
i = 2;
piece_pointer = 1;
compress_pointer = Some(1);
}
while i < len {
if piece_pointer == 8 {
return Err(ParseError::InvalidIpv6Address)
}
if input[i] == b':' {
if compress_pointer.is_some() {
return Err(ParseError::InvalidIpv6Address)
}
i += 1;
piece_pointer += 1;
compress_pointer = Some(piece_pointer);
continue
}
let start = i;
let end = cmp::min(len, start + 4);
let mut value = 0u16;
while i < end {
match (input[i] as char).to_digit(16) {
Some(digit) => {
value = value * 0x10 + digit as u16;
i += 1;
},
None => break
}
}
if i < len {
match input[i] {
b'.' => {
if i == start {
return Err(ParseError::InvalidIpv6Address)
}
i = start;
is_ip_v4 = true;
},
b':' => {
i += 1;
if i == len {
return Err(ParseError::InvalidIpv6Address)
}
},
_ => return Err(ParseError::InvalidIpv6Address)
}
}
if is_ip_v4 {
break
}
pieces[piece_pointer] = value;
piece_pointer += 1;
}
if is_ip_v4 {
if piece_pointer > 6 {
return Err(ParseError::InvalidIpv6Address)
}
let mut dots_seen = 0;
while i < len {
// FIXME: https://github.com/whatwg/url/commit/1c22aa119c354e0020117e02571cec53f7c01064
let mut value = 0u16;
while i < len {
let digit = match input[i] {
c @ b'0' ... b'9' => c - b'0',
_ => break
};
value = value * 10 + digit as u16;
if value == 0 || value > 255 {
return Err(ParseError::InvalidIpv6Address)
}
}
if dots_seen < 3 && !(i < len && input[i] == b'.') {
return Err(ParseError::InvalidIpv6Address)
}
pieces[piece_pointer] = pieces[piece_pointer] * 0x100 + value;
if dots_seen == 0 || dots_seen == 2 {
piece_pointer += 1;
}
i += 1;
if dots_seen == 3 && i < len {
return Err(ParseError::InvalidIpv6Address)
}
dots_seen += 1;
}
}
match compress_pointer {
Some(compress_pointer) => {
let mut swaps = piece_pointer - compress_pointer;
piece_pointer = 7;
while swaps > 0 {
pieces[piece_pointer] = pieces[compress_pointer + swaps - 1];
pieces[compress_pointer + swaps - 1] = 0;
swaps -= 1;
piece_pointer -= 1;
}
}
_ => if piece_pointer != 8 {
return Err(ParseError::InvalidIpv6Address)
}
}
Ok(Ipv6Addr::new(pieces[0], pieces[1], pieces[2], pieces[3],
pieces[4], pieces[5], pieces[6], pieces[7]))
}

File diff suppressed because it is too large Load Diff

View File

@ -1,114 +0,0 @@
// Copyright 2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[cfg(feature = "heapsize")] use heapsize::HeapSizeOf;
use host::Host;
use idna::domain_to_unicode;
use parser::default_port;
use std::sync::atomic::{AtomicUsize, ATOMIC_USIZE_INIT, Ordering};
use Url;
pub fn url_origin(url: &Url) -> Origin {
let scheme = url.scheme();
match scheme {
"blob" => {
let result = Url::parse(url.path());
match result {
Ok(ref url) => url_origin(url),
Err(_) => Origin::new_opaque()
}
},
"ftp" | "gopher" | "http" | "https" | "ws" | "wss" => {
Origin::Tuple(scheme.to_owned(), url.host().unwrap().to_owned(),
url.port_or_known_default().unwrap())
},
// TODO: Figure out what to do if the scheme is a file
"file" => Origin::new_opaque(),
_ => Origin::new_opaque()
}
}
/// The origin of an URL
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum Origin {
/// A globally unique identifier
Opaque(OpaqueOrigin),
/// Consists of the URL's scheme, host and port
Tuple(String, Host<String>, u16)
}
#[cfg(feature = "heapsize")]
impl HeapSizeOf for Origin {
fn heap_size_of_children(&self) -> usize {
match *self {
Origin::Tuple(ref scheme, ref host, _) => {
scheme.heap_size_of_children() +
host.heap_size_of_children()
},
_ => 0,
}
}
}
impl Origin {
/// Creates a new opaque origin that is only equal to itself.
pub fn new_opaque() -> Origin {
static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT;
Origin::Opaque(OpaqueOrigin(COUNTER.fetch_add(1, Ordering::SeqCst)))
}
/// Return whether this origin is a (scheme, host, port) tuple
/// (as opposed to an opaque origin).
pub fn is_tuple(&self) -> bool {
matches!(*self, Origin::Tuple(..))
}
/// https://html.spec.whatwg.org/multipage/#ascii-serialisation-of-an-origin
pub fn ascii_serialization(&self) -> String {
match *self {
Origin::Opaque(_) => "null".to_owned(),
Origin::Tuple(ref scheme, ref host, port) => {
if default_port(scheme) == Some(port) {
format!("{}://{}", scheme, host)
} else {
format!("{}://{}:{}", scheme, host, port)
}
}
}
}
/// https://html.spec.whatwg.org/multipage/#unicode-serialisation-of-an-origin
pub fn unicode_serialization(&self) -> String {
match *self {
Origin::Opaque(_) => "null".to_owned(),
Origin::Tuple(ref scheme, ref host, port) => {
let host = match *host {
Host::Domain(ref domain) => {
let (domain, _errors) = domain_to_unicode(domain);
Host::Domain(domain)
}
_ => host.clone()
};
if default_port(scheme) == Some(port) {
format!("{}://{}", scheme, host)
} else {
format!("{}://{}:{}", scheme, host, port)
}
}
}
}
}
/// Opaque identifier for URLs that have file or other schemes
#[derive(Eq, PartialEq, Clone, Debug)]
pub struct OpaqueOrigin(usize);
#[cfg(feature = "heapsize")]
known_heap_size!(0, OpaqueOrigin);

File diff suppressed because it is too large Load Diff

View File

@ -1,187 +0,0 @@
// Copyright 2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use parser::{self, SchemeType, to_u32};
use std::str;
use Url;
/// Exposes methods to manipulate the path of an URL that is not cannot-be-base.
///
/// The path always starts with a `/` slash, and is made of slash-separated segments.
/// There is always at least one segment (which may be the empty string).
///
/// Examples:
///
/// ```rust
/// # use url::Url;
/// let mut url = Url::parse("mailto:me@example.com").unwrap();
/// assert!(url.path_segments_mut().is_err());
///
/// let mut url = Url::parse("http://example.net/foo/index.html").unwrap();
/// url.path_segments_mut().unwrap().pop().push("img").push("2/100%.png");
/// assert_eq!(url.as_str(), "http://example.net/foo/img/2%2F100%25.png");
/// ```
pub struct PathSegmentsMut<'a> {
url: &'a mut Url,
after_first_slash: usize,
after_path: String,
old_after_path_position: u32,
}
// Not re-exported outside the crate
pub fn new(url: &mut Url) -> PathSegmentsMut {
let after_path = url.take_after_path();
let old_after_path_position = to_u32(url.serialization.len()).unwrap();
debug_assert!(url.byte_at(url.path_start) == b'/');
PathSegmentsMut {
after_first_slash: url.path_start as usize + "/".len(),
url: url,
old_after_path_position: old_after_path_position,
after_path: after_path,
}
}
impl<'a> Drop for PathSegmentsMut<'a> {
fn drop(&mut self) {
self.url.restore_after_path(self.old_after_path_position, &self.after_path)
}
}
impl<'a> PathSegmentsMut<'a> {
/// Remove all segments in the path, leaving the minimal `url.path() == "/"`.
///
/// Returns `&mut Self` so that method calls can be chained.
///
/// Example:
///
/// ```rust
/// # use url::Url;
/// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap();
/// url.path_segments_mut().unwrap().clear().push("logout");
/// assert_eq!(url.as_str(), "https://github.com/logout");
/// ```
pub fn clear(&mut self) -> &mut Self {
self.url.serialization.truncate(self.after_first_slash);
self
}
/// Remove the last segment of this URLs path if it is empty,
/// except if these was only one segment to begin with.
///
/// In other words, remove one path trailing slash, if any,
/// unless it is also the initial slash (so this does nothing if `url.path() == "/")`.
///
/// Returns `&mut Self` so that method calls can be chained.
///
/// Example:
///
/// ```rust
/// # use url::Url;
/// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap();
/// url.path_segments_mut().unwrap().push("pulls");
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url//pulls");
///
/// let mut url = Url::parse("https://github.com/servo/rust-url/").unwrap();
/// url.path_segments_mut().unwrap().pop_if_empty().push("pulls");
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
/// ```
pub fn pop_if_empty(&mut self) -> &mut Self {
if self.url.serialization[self.after_first_slash..].ends_with('/') {
self.url.serialization.pop();
}
self
}
/// Remove the last segment of this URLs path.
///
/// If the path only has one segment, make it empty such that `url.path() == "/"`.
///
/// Returns `&mut Self` so that method calls can be chained.
pub fn pop(&mut self) -> &mut Self {
let last_slash = self.url.serialization[self.after_first_slash..].rfind('/').unwrap_or(0);
self.url.serialization.truncate(self.after_first_slash + last_slash);
self
}
/// Append the given segment at the end of this URLs path.
///
/// See the documentation for `.extend()`.
///
/// Returns `&mut Self` so that method calls can be chained.
pub fn push(&mut self, segment: &str) -> &mut Self {
self.extend(Some(segment))
}
/// Append each segment from the given iterator at the end of this URLs path.
///
/// Each segment is percent-encoded like in `Url::parse` or `Url::join`,
/// except that `%` and `/` characters are also encoded (to `%25` and `%2F`).
/// This is unlike `Url::parse` where `%` is left as-is in case some of the input
/// is already percent-encoded, and `/` denotes a path segment separator.)
///
/// Note that, in addition to slashes between new segments,
/// this always adds a slash between the existing path and the new segments
/// *except* if the existing path is `"/"`.
/// If the previous last segment was empty (if the path had a trailing slash)
/// the path after `.extend()` will contain two consecutive slashes.
/// If that is undesired, call `.pop_if_empty()` first.
///
/// To obtain a behavior similar to `Url::join`, call `.pop()` unconditionally first.
///
/// Returns `&mut Self` so that method calls can be chained.
///
/// Example:
///
/// ```rust
/// # use url::Url;
/// let mut url = Url::parse("https://github.com/").unwrap();
/// let org = "servo";
/// let repo = "rust-url";
/// let issue_number = "188";
/// url.path_segments_mut().unwrap().extend(&[org, repo, "issues", issue_number]);
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/issues/188");
/// ```
///
/// In order to make sure that parsing the serialization of an URL gives the same URL,
/// a segment is ignored if it is `"."` or `".."`:
///
/// ```rust
/// # use url::Url;
/// let mut url = Url::parse("https://github.com/servo").unwrap();
/// url.path_segments_mut().unwrap().extend(&["..", "rust-url", ".", "pulls"]);
/// assert_eq!(url.as_str(), "https://github.com/servo/rust-url/pulls");
/// ```
pub fn extend<I>(&mut self, segments: I) -> &mut Self
where I: IntoIterator, I::Item: AsRef<str> {
let scheme_type = SchemeType::from(self.url.scheme());
let path_start = self.url.path_start as usize;
self.url.mutate(|parser| {
parser.context = parser::Context::PathSegmentSetter;
for segment in segments {
let segment = segment.as_ref();
if matches!(segment, "." | "..") {
continue
}
if parser.serialization.len() > path_start + 1 {
parser.serialization.push('/');
}
let mut has_host = true; // FIXME account for this?
parser.parse_path(scheme_type, &mut has_host, path_start,
parser::Input::new(segment));
}
});
self
}
/// For internal testing, not part of the public API.
#[doc(hidden)]
pub fn assert_url_invariants(&mut self) -> &mut Self {
self.url.assert_invariants();
self
}
}

View File

@ -1,344 +0,0 @@
// Copyright 2013-2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use encoding;
use std::ascii::AsciiExt;
use std::borrow::Cow;
use std::fmt;
use std::slice;
use std::str;
/// Represents a set of characters / bytes that should be percent-encoded.
///
/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
///
/// Different characters need to be encoded in different parts of an URL.
/// For example, a literal `?` question mark in an URLs path would indicate
/// the start of the query string.
/// A question mark meant to be part of the path therefore needs to be percent-encoded.
/// In the query string however, a question mark does not have any special meaning
/// and does not need to be percent-encoded.
///
/// A few sets are defined in this module.
/// Use the [`define_encode_set!`](../macro.define_encode_set!.html) macro to define different ones.
pub trait EncodeSet: Clone {
/// Called with UTF-8 bytes rather than code points.
/// Should return true for all non-ASCII bytes.
fn contains(&self, byte: u8) -> bool;
}
/// Define a new struct
/// that implements the [`EncodeSet`](percent_encoding/trait.EncodeSet.html) trait,
/// for use in [`percent_decode()`](percent_encoding/fn.percent_encode.html)
/// and related functions.
///
/// Parameters are characters to include in the set in addition to those of the base set.
/// See [encode sets specification](http://url.spec.whatwg.org/#simple-encode-set).
///
/// Example
/// =======
///
/// ```rust
/// #[macro_use] extern crate url;
/// use url::percent_encoding::{utf8_percent_encode, SIMPLE_ENCODE_SET};
/// define_encode_set! {
/// /// This encode set is used in the URL parser for query strings.
/// pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
/// }
/// # fn main() {
/// assert_eq!(utf8_percent_encode("foo bar", QUERY_ENCODE_SET).collect::<String>(), "foo%20bar");
/// # }
/// ```
#[macro_export]
macro_rules! define_encode_set {
($(#[$attr: meta])* pub $name: ident = [$base_set: expr] | {$($ch: pat),*}) => {
$(#[$attr])*
#[derive(Copy, Clone)]
#[allow(non_camel_case_types)]
pub struct $name;
impl $crate::percent_encoding::EncodeSet for $name {
#[inline]
fn contains(&self, byte: u8) -> bool {
match byte as char {
$(
$ch => true,
)*
_ => $base_set.contains(byte)
}
}
}
}
}
/// This encode set is used for the path of cannot-be-a-base URLs.
#[derive(Copy, Clone)]
#[allow(non_camel_case_types)]
pub struct SIMPLE_ENCODE_SET;
impl EncodeSet for SIMPLE_ENCODE_SET {
#[inline]
fn contains(&self, byte: u8) -> bool {
byte < 0x20 || byte > 0x7E
}
}
define_encode_set! {
/// This encode set is used in the URL parser for query strings.
pub QUERY_ENCODE_SET = [SIMPLE_ENCODE_SET] | {' ', '"', '#', '<', '>'}
}
define_encode_set! {
/// This encode set is used for path components.
pub DEFAULT_ENCODE_SET = [QUERY_ENCODE_SET] | {'`', '?', '{', '}'}
}
define_encode_set! {
/// This encode set is used for on '/'-separated path segment
pub PATH_SEGMENT_ENCODE_SET = [DEFAULT_ENCODE_SET] | {'%', '/'}
}
define_encode_set! {
/// This encode set is used for username and password.
pub USERINFO_ENCODE_SET = [DEFAULT_ENCODE_SET] | {
'/', ':', ';', '=', '@', '[', '\\', ']', '^', '|'
}
}
/// Return the percent-encoding of the given bytes.
///
/// This is unconditional, unlike `percent_encode()` which uses an encode set.
pub fn percent_encode_byte(byte: u8) -> &'static str {
let index = usize::from(byte) * 3;
&"\
%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F\
%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F\
%20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F\
%30%31%32%33%34%35%36%37%38%39%3A%3B%3C%3D%3E%3F\
%40%41%42%43%44%45%46%47%48%49%4A%4B%4C%4D%4E%4F\
%50%51%52%53%54%55%56%57%58%59%5A%5B%5C%5D%5E%5F\
%60%61%62%63%64%65%66%67%68%69%6A%6B%6C%6D%6E%6F\
%70%71%72%73%74%75%76%77%78%79%7A%7B%7C%7D%7E%7F\
%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F\
%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F\
%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF\
%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF\
%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF\
%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF\
%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF\
%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF\
"[index..index + 3]
}
/// Percent-encode the given bytes with the given encode set.
///
/// The encode set define which bytes (in addition to non-ASCII and controls)
/// need to be percent-encoded.
/// The choice of this set depends on context.
/// For example, `?` needs to be encoded in an URL path but not in a query string.
///
/// The return value is an iterator of `&str` slices (so it has a `.collect::<String>()` method)
/// that also implements `Display` and `Into<Cow<str>>`.
/// The latter returns `Cow::Borrowed` when none of the bytes in `input`
/// are in the given encode set.
#[inline]
pub fn percent_encode<E: EncodeSet>(input: &[u8], encode_set: E) -> PercentEncode<E> {
PercentEncode {
bytes: input,
encode_set: encode_set,
}
}
/// Percent-encode the UTF-8 encoding of the given string.
///
/// See `percent_encode()` for how to use the return value.
#[inline]
pub fn utf8_percent_encode<E: EncodeSet>(input: &str, encode_set: E) -> PercentEncode<E> {
percent_encode(input.as_bytes(), encode_set)
}
/// The return type of `percent_encode()` and `utf8_percent_encode()`.
#[derive(Clone)]
pub struct PercentEncode<'a, E: EncodeSet> {
bytes: &'a [u8],
encode_set: E,
}
impl<'a, E: EncodeSet> Iterator for PercentEncode<'a, E> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
if let Some((&first_byte, remaining)) = self.bytes.split_first() {
if self.encode_set.contains(first_byte) {
self.bytes = remaining;
Some(percent_encode_byte(first_byte))
} else {
assert!(first_byte.is_ascii());
for (i, &byte) in remaining.iter().enumerate() {
if self.encode_set.contains(byte) {
// 1 for first_byte + i for previous iterations of this loop
let (unchanged_slice, remaining) = self.bytes.split_at(1 + i);
self.bytes = remaining;
return Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
} else {
assert!(byte.is_ascii());
}
}
let unchanged_slice = self.bytes;
self.bytes = &[][..];
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
}
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
if self.bytes.is_empty() {
(0, Some(0))
} else {
(1, Some(self.bytes.len()))
}
}
}
impl<'a, E: EncodeSet> fmt::Display for PercentEncode<'a, E> {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
for c in (*self).clone() {
try!(formatter.write_str(c))
}
Ok(())
}
}
impl<'a, E: EncodeSet> From<PercentEncode<'a, E>> for Cow<'a, str> {
fn from(mut iter: PercentEncode<'a, E>) -> Self {
match iter.next() {
None => "".into(),
Some(first) => {
match iter.next() {
None => first.into(),
Some(second) => {
let mut string = first.to_owned();
string.push_str(second);
string.extend(iter);
string.into()
}
}
}
}
}
}
/// Percent-decode the given bytes.
///
/// The return value is an iterator of decoded `u8` bytes
/// that also implements `Into<Cow<u8>>`
/// (which returns `Cow::Borrowed` when `input` contains no percent-encoded sequence)
/// and has `decode_utf8()` and `decode_utf8_lossy()` methods.
#[inline]
pub fn percent_decode<'a>(input: &'a [u8]) -> PercentDecode<'a> {
PercentDecode {
bytes: input.iter()
}
}
/// The return type of `percent_decode()`.
#[derive(Clone)]
pub struct PercentDecode<'a> {
bytes: slice::Iter<'a, u8>,
}
fn after_percent_sign(iter: &mut slice::Iter<u8>) -> Option<u8> {
let initial_iter = iter.clone();
let h = iter.next().and_then(|&b| (b as char).to_digit(16));
let l = iter.next().and_then(|&b| (b as char).to_digit(16));
if let (Some(h), Some(l)) = (h, l) {
Some(h as u8 * 0x10 + l as u8)
} else {
*iter = initial_iter;
None
}
}
impl<'a> Iterator for PercentDecode<'a> {
type Item = u8;
fn next(&mut self) -> Option<u8> {
self.bytes.next().map(|&byte| {
if byte == b'%' {
after_percent_sign(&mut self.bytes).unwrap_or(byte)
} else {
byte
}
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
let bytes = self.bytes.len();
(bytes / 3, Some(bytes))
}
}
impl<'a> From<PercentDecode<'a>> for Cow<'a, [u8]> {
fn from(iter: PercentDecode<'a>) -> Self {
match iter.if_any() {
Some(vec) => Cow::Owned(vec),
None => Cow::Borrowed(iter.bytes.as_slice()),
}
}
}
impl<'a> PercentDecode<'a> {
/// If the percent-decoding is different from the input, return it as a new bytes vector.
pub fn if_any(&self) -> Option<Vec<u8>> {
let mut bytes_iter = self.bytes.clone();
while bytes_iter.find(|&&b| b == b'%').is_some() {
if let Some(decoded_byte) = after_percent_sign(&mut bytes_iter) {
let initial_bytes = self.bytes.as_slice();
let unchanged_bytes_len = initial_bytes.len() - bytes_iter.len() - 3;
let mut decoded = initial_bytes[..unchanged_bytes_len].to_owned();
decoded.push(decoded_byte);
decoded.extend(PercentDecode {
bytes: bytes_iter
});
return Some(decoded)
}
}
// Nothing to decode
None
}
/// Decode the result of percent-decoding as UTF-8.
///
/// This is return `Err` when the percent-decoded bytes are not well-formed in UTF-8.
pub fn decode_utf8(self) -> Result<Cow<'a, str>, str::Utf8Error> {
match self.clone().into() {
Cow::Borrowed(bytes) => {
match str::from_utf8(bytes) {
Ok(s) => Ok(s.into()),
Err(e) => Err(e),
}
}
Cow::Owned(bytes) => {
match String::from_utf8(bytes) {
Ok(s) => Ok(s.into()),
Err(e) => Err(e.utf8_error()),
}
}
}
}
/// Decode the result of percent-decoding as UTF-8, lossily.
///
/// Invalid UTF-8 percent-encoded byte sequences will be replaced <20> U+FFFD,
/// the replacement character.
pub fn decode_utf8_lossy(self) -> Cow<'a, str> {
encoding::decode_utf8_lossy(self.clone().into())
}
}

View File

@ -1,217 +0,0 @@
// Copyright 2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Getters and setters for URL components implemented per https://url.spec.whatwg.org/#api
//!
//! Unless you need to be interoperable with web browsers,
//! you probably want to use `Url` method instead.
use {Url, Position, Host, ParseError, idna};
use parser::{Parser, SchemeType, default_port, Context, Input};
/// https://url.spec.whatwg.org/#dom-url-domaintoascii
pub fn domain_to_ascii(domain: &str) -> String {
match Host::parse(domain) {
Ok(Host::Domain(domain)) => domain,
_ => String::new(),
}
}
/// https://url.spec.whatwg.org/#dom-url-domaintounicode
pub fn domain_to_unicode(domain: &str) -> String {
match Host::parse(domain) {
Ok(Host::Domain(ref domain)) => {
let (unicode, _errors) = idna::domain_to_unicode(domain);
unicode
}
_ => String::new(),
}
}
/// Getter for https://url.spec.whatwg.org/#dom-url-href
pub fn href(url: &Url) -> &str {
url.as_str()
}
/// Setter for https://url.spec.whatwg.org/#dom-url-href
pub fn set_href(url: &mut Url, value: &str) -> Result<(), ParseError> {
*url = try!(Url::parse(value));
Ok(())
}
/// Getter for https://url.spec.whatwg.org/#dom-url-origin
pub fn origin(url: &Url) -> String {
url.origin().unicode_serialization()
}
/// Getter for https://url.spec.whatwg.org/#dom-url-protocol
#[inline]
pub fn protocol(url: &Url) -> &str {
&url.as_str()[..url.scheme().len() + ":".len()]
}
/// Setter for https://url.spec.whatwg.org/#dom-url-protocol
pub fn set_protocol(url: &mut Url, mut new_protocol: &str) -> Result<(), ()> {
// The scheme state in the spec ignores everything after the first `:`,
// but `set_scheme` errors if there is more.
if let Some(position) = new_protocol.find(':') {
new_protocol = &new_protocol[..position];
}
url.set_scheme(new_protocol)
}
/// Getter for https://url.spec.whatwg.org/#dom-url-username
#[inline]
pub fn username(url: &Url) -> &str {
url.username()
}
/// Setter for https://url.spec.whatwg.org/#dom-url-username
pub fn set_username(url: &mut Url, new_username: &str) -> Result<(), ()> {
url.set_username(new_username)
}
/// Getter for https://url.spec.whatwg.org/#dom-url-password
#[inline]
pub fn password(url: &Url) -> &str {
url.password().unwrap_or("")
}
/// Setter for https://url.spec.whatwg.org/#dom-url-password
pub fn set_password(url: &mut Url, new_password: &str) -> Result<(), ()> {
url.set_password(if new_password.is_empty() { None } else { Some(new_password) })
}
/// Getter for https://url.spec.whatwg.org/#dom-url-host
#[inline]
pub fn host(url: &Url) -> &str {
&url[Position::BeforeHost..Position::AfterPort]
}
/// Setter for https://url.spec.whatwg.org/#dom-url-host
pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
if url.cannot_be_a_base() {
return Err(())
}
let host;
let opt_port;
{
let scheme = url.scheme();
let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme));
match result {
Ok((h, remaining)) => {
host = h;
opt_port = if let Some(remaining) = remaining.split_prefix(':') {
Parser::parse_port(remaining, || default_port(scheme), Context::Setter)
.ok().map(|(port, _remaining)| port)
} else {
None
};
}
Err(_) => return Err(())
}
}
url.set_host_internal(host, opt_port);
Ok(())
}
/// Getter for https://url.spec.whatwg.org/#dom-url-hostname
#[inline]
pub fn hostname(url: &Url) -> &str {
url.host_str().unwrap_or("")
}
/// Setter for https://url.spec.whatwg.org/#dom-url-hostname
pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
if url.cannot_be_a_base() {
return Err(())
}
let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme()));
if let Ok((host, _remaining)) = result {
url.set_host_internal(host, None);
Ok(())
} else {
Err(())
}
}
/// Getter for https://url.spec.whatwg.org/#dom-url-port
#[inline]
pub fn port(url: &Url) -> &str {
&url[Position::BeforePort..Position::AfterPort]
}
/// Setter for https://url.spec.whatwg.org/#dom-url-port
pub fn set_port(url: &mut Url, new_port: &str) -> Result<(), ()> {
let result;
{
// has_host implies !cannot_be_a_base
let scheme = url.scheme();
if !url.has_host() || scheme == "file" {
return Err(())
}
result = Parser::parse_port(Input::new(new_port), || default_port(scheme), Context::Setter)
}
if let Ok((new_port, _remaining)) = result {
url.set_port_internal(new_port);
Ok(())
} else {
Err(())
}
}
/// Getter for https://url.spec.whatwg.org/#dom-url-pathname
#[inline]
pub fn pathname(url: &Url) -> &str {
url.path()
}
/// Setter for https://url.spec.whatwg.org/#dom-url-pathname
pub fn set_pathname(url: &mut Url, new_pathname: &str) {
if !url.cannot_be_a_base() {
url.set_path(new_pathname)
}
}
/// Getter for https://url.spec.whatwg.org/#dom-url-search
pub fn search(url: &Url) -> &str {
trim(&url[Position::AfterPath..Position::AfterQuery])
}
/// Setter for https://url.spec.whatwg.org/#dom-url-search
pub fn set_search(url: &mut Url, new_search: &str) {
url.set_query(match new_search {
"" => None,
_ if new_search.starts_with('?') => Some(&new_search[1..]),
_ => Some(new_search),
})
}
/// Getter for https://url.spec.whatwg.org/#dom-url-hash
pub fn hash(url: &Url) -> &str {
trim(&url[Position::AfterQuery..])
}
/// Setter for https://url.spec.whatwg.org/#dom-url-hash
pub fn set_hash(url: &mut Url, new_hash: &str) {
if url.scheme() != "javascript" {
url.set_fragment(match new_hash {
"" => None,
_ if new_hash.starts_with('#') => Some(&new_hash[1..]),
_ => Some(new_hash),
})
}
}
fn trim(s: &str) -> &str {
if s.len() == 1 {
""
} else {
s
}
}

View File

@ -1,182 +0,0 @@
// Copyright 2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::ops::{Range, RangeFrom, RangeTo, RangeFull, Index};
use Url;
impl Index<RangeFull> for Url {
type Output = str;
fn index(&self, _: RangeFull) -> &str {
&self.serialization
}
}
impl Index<RangeFrom<Position>> for Url {
type Output = str;
fn index(&self, range: RangeFrom<Position>) -> &str {
&self.serialization[self.index(range.start)..]
}
}
impl Index<RangeTo<Position>> for Url {
type Output = str;
fn index(&self, range: RangeTo<Position>) -> &str {
&self.serialization[..self.index(range.end)]
}
}
impl Index<Range<Position>> for Url {
type Output = str;
fn index(&self, range: Range<Position>) -> &str {
&self.serialization[self.index(range.start)..self.index(range.end)]
}
}
/// Indicates a position within a URL based on its components.
///
/// A range of positions can be used for slicing `Url`:
///
/// ```rust
/// # use url::{Url, Position};
/// # fn something(some_url: Url) {
/// let serialization: &str = &some_url[..];
/// let serialization_without_fragment: &str = &some_url[..Position::AfterQuery];
/// let authority: &str = &some_url[Position::BeforeUsername..Position::AfterPort];
/// let data_url_payload: &str = &some_url[Position::BeforePath..Position::AfterQuery];
/// let scheme_relative: &str = &some_url[Position::BeforeUsername..];
/// # }
/// ```
///
/// In a pseudo-grammar (where `[`…`]?` makes a sub-sequence optional),
/// URL components and delimiters that separate them are:
///
/// ```notrust
/// url =
/// scheme ":"
/// [ "//" [ username [ ":" password ]? "@" ]? host [ ":" port ]? ]?
/// path [ "?" query ]? [ "#" fragment ]?
/// ```
///
/// When a given component is not present,
/// its "before" and "after" position are the same
/// (so that `&some_url[BeforeFoo..AfterFoo]` is the empty string)
/// and component ordering is preserved
/// (so that a missing query "is between" a path and a fragment).
///
/// The end of a component and the start of the next are either the same or separate
/// by a delimiter.
/// (Not that the initial `/` of a path is considered part of the path here, not a delimiter.)
/// For example, `&url[..BeforeFragment]` would include a `#` delimiter (if present in `url`),
/// so `&url[..AfterQuery]` might be desired instead.
///
/// `BeforeScheme` and `AfterFragment` are always the start and end of the entire URL,
/// so `&url[BeforeScheme..X]` is the same as `&url[..X]`
/// and `&url[X..AfterFragment]` is the same as `&url[X..]`.
#[derive(Copy, Clone, Debug)]
pub enum Position {
BeforeScheme,
AfterScheme,
BeforeUsername,
AfterUsername,
BeforePassword,
AfterPassword,
BeforeHost,
AfterHost,
BeforePort,
AfterPort,
BeforePath,
AfterPath,
BeforeQuery,
AfterQuery,
BeforeFragment,
AfterFragment
}
impl Url {
#[inline]
fn index(&self, position: Position) -> usize {
match position {
Position::BeforeScheme => 0,
Position::AfterScheme => self.scheme_end as usize,
Position::BeforeUsername => if self.has_authority() {
self.scheme_end as usize + "://".len()
} else {
debug_assert!(self.byte_at(self.scheme_end) == b':');
debug_assert!(self.scheme_end + ":".len() as u32 == self.username_end);
self.scheme_end as usize + ":".len()
},
Position::AfterUsername => self.username_end as usize,
Position::BeforePassword => if self.has_authority() &&
self.byte_at(self.username_end) == b':' {
self.username_end as usize + ":".len()
} else {
debug_assert!(self.username_end == self.host_start);
self.username_end as usize
},
Position::AfterPassword => if self.has_authority() &&
self.byte_at(self.username_end) == b':' {
debug_assert!(self.byte_at(self.host_start - "@".len() as u32) == b'@');
self.host_start as usize - "@".len()
} else {
debug_assert!(self.username_end == self.host_start);
self.host_start as usize
},
Position::BeforeHost => self.host_start as usize,
Position::AfterHost => self.host_end as usize,
Position::BeforePort => if self.port.is_some() {
debug_assert!(self.byte_at(self.host_end) == b':');
self.host_end as usize + ":".len()
} else {
self.host_end as usize
},
Position::AfterPort => self.path_start as usize,
Position::BeforePath => self.path_start as usize,
Position::AfterPath => match (self.query_start, self.fragment_start) {
(Some(q), _) => q as usize,
(None, Some(f)) => f as usize,
(None, None) => self.serialization.len(),
},
Position::BeforeQuery => match (self.query_start, self.fragment_start) {
(Some(q), _) => {
debug_assert!(self.byte_at(q) == b'?');
q as usize + "?".len()
}
(None, Some(f)) => f as usize,
(None, None) => self.serialization.len(),
},
Position::AfterQuery => match self.fragment_start {
None => self.serialization.len(),
Some(f) => f as usize,
},
Position::BeforeFragment => match self.fragment_start {
Some(f) => {
debug_assert!(self.byte_at(f) == b'#');
f as usize + "#".len()
}
None => self.serialization.len(),
},
Position::AfterFragment => self.serialization.len(),
}
}
}

View File

@ -1,193 +0,0 @@
// Copyright 2013-2014 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Data-driven tests
extern crate rustc_serialize;
extern crate test;
extern crate url;
use rustc_serialize::json::{self, Json};
use url::{Url, quirks};
fn run_parsing(input: String, base: String, expected: Result<ExpectedAttributes, ()>) {
let base = match Url::parse(&base) {
Ok(base) => base,
Err(message) => panic!("Error parsing base {:?}: {}", base, message)
};
let (url, expected) = match (base.join(&input), expected) {
(Ok(url), Ok(expected)) => (url, expected),
(Err(_), Err(())) => return,
(Err(message), Ok(_)) => panic!("Error parsing URL {:?}: {}", input, message),
(Ok(_), Err(())) => panic!("Expected a parse error for URL {:?}", input),
};
url.assert_invariants();
macro_rules! assert_eq {
($expected: expr, $got: expr) => {
{
let expected = $expected;
let got = $got;
assert!(expected == got, "{:?} != {} {:?} for URL {:?}",
got, stringify!($expected), expected, url);
}
}
}
macro_rules! assert_attributes {
($($attr: ident)+) => {
{
$(
assert_eq!(expected.$attr, quirks::$attr(&url));
)+;
}
}
}
assert_attributes!(href protocol username password host hostname port pathname search hash);
if let Some(expected_origin) = expected.origin {
assert_eq!(expected_origin, quirks::origin(&url));
}
}
struct ExpectedAttributes {
href: String,
origin: Option<String>,
protocol: String,
username: String,
password: String,
host: String,
hostname: String,
port: String,
pathname: String,
search: String,
hash: String,
}
trait JsonExt {
fn take(&mut self, key: &str) -> Option<Json>;
fn object(self) -> json::Object;
fn string(self) -> String;
fn take_string(&mut self, key: &str) -> String;
}
impl JsonExt for Json {
fn take(&mut self, key: &str) -> Option<Json> {
self.as_object_mut().unwrap().remove(key)
}
fn object(self) -> json::Object {
if let Json::Object(o) = self { o } else { panic!("Not a Json::Object") }
}
fn string(self) -> String {
if let Json::String(s) = self { s } else { panic!("Not a Json::String") }
}
fn take_string(&mut self, key: &str) -> String {
self.take(key).unwrap().string()
}
}
fn collect_parsing<F: FnMut(String, test::TestFn)>(add_test: &mut F) {
// Copied form https://github.com/w3c/web-platform-tests/blob/master/url/
let mut json = Json::from_str(include_str!("urltestdata.json"))
.expect("JSON parse error in urltestdata.json");
for entry in json.as_array_mut().unwrap() {
if entry.is_string() {
continue // ignore comments
}
let base = entry.take_string("base");
let input = entry.take_string("input");
let expected = if entry.find("failure").is_some() {
Err(())
} else {
Ok(ExpectedAttributes {
href: entry.take_string("href"),
origin: entry.take("origin").map(Json::string),
protocol: entry.take_string("protocol"),
username: entry.take_string("username"),
password: entry.take_string("password"),
host: entry.take_string("host"),
hostname: entry.take_string("hostname"),
port: entry.take_string("port"),
pathname: entry.take_string("pathname"),
search: entry.take_string("search"),
hash: entry.take_string("hash"),
})
};
add_test(format!("{:?} @ base {:?}", input, base),
test::TestFn::dyn_test_fn(move || run_parsing(input, base, expected)));
}
}
fn collect_setters<F>(add_test: &mut F) where F: FnMut(String, test::TestFn) {
let mut json = Json::from_str(include_str!("setters_tests.json"))
.expect("JSON parse error in setters_tests.json");
macro_rules! setter {
($attr: expr, $setter: ident) => {{
let mut tests = json.take($attr).unwrap();
for mut test in tests.as_array_mut().unwrap().drain(..) {
let comment = test.take("comment").map(Json::string).unwrap_or(String::new());
let href = test.take_string("href");
let new_value = test.take_string("new_value");
let name = format!("{:?}.{} = {:?} {}", href, $attr, new_value, comment);
let mut expected = test.take("expected").unwrap();
add_test(name, test::TestFn::dyn_test_fn(move || {
let mut url = Url::parse(&href).unwrap();
url.assert_invariants();
let _ = quirks::$setter(&mut url, &new_value);
assert_attributes!(url, expected,
href protocol username password host hostname port pathname search hash);
url.assert_invariants();
}))
}
}}
}
macro_rules! assert_attributes {
($url: expr, $expected: expr, $($attr: ident)+) => {
$(
if let Some(value) = $expected.take(stringify!($attr)) {
assert_eq!(quirks::$attr(&$url), value.string())
}
)+
}
}
setter!("protocol", set_protocol);
setter!("username", set_username);
setter!("password", set_password);
setter!("hostname", set_hostname);
setter!("host", set_host);
setter!("port", set_port);
setter!("pathname", set_pathname);
setter!("search", set_search);
setter!("hash", set_hash);
}
fn main() {
let mut tests = Vec::new();
{
let mut add_one = |name: String, run: test::TestFn| {
tests.push(test::TestDescAndFn {
desc: test::TestDesc {
name: test::DynTestName(name),
ignore: false,
should_panic: test::ShouldPanic::No,
},
testfn: run,
})
};
collect_parsing(&mut add_one);
collect_setters(&mut add_one);
}
test::test_main(&std::env::args().collect::<Vec<_>>(), tests)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,303 +0,0 @@
// Copyright 2013-2014 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Unit tests
extern crate url;
use std::borrow::Cow;
use std::net::{Ipv4Addr, Ipv6Addr};
use std::path::{Path, PathBuf};
use url::{Host, Url, form_urlencoded};
macro_rules! assert_from_file_path {
($path: expr) => { assert_from_file_path!($path, $path) };
($path: expr, $url_path: expr) => {{
let url = Url::from_file_path(Path::new($path)).unwrap();
assert_eq!(url.host(), None);
assert_eq!(url.path(), $url_path);
assert_eq!(url.to_file_path(), Ok(PathBuf::from($path)));
}};
}
#[test]
fn new_file_paths() {
if cfg!(unix) {
assert_eq!(Url::from_file_path(Path::new("relative")), Err(()));
assert_eq!(Url::from_file_path(Path::new("../relative")), Err(()));
}
if cfg!(windows) {
assert_eq!(Url::from_file_path(Path::new("relative")), Err(()));
assert_eq!(Url::from_file_path(Path::new(r"..\relative")), Err(()));
assert_eq!(Url::from_file_path(Path::new(r"\drive-relative")), Err(()));
assert_eq!(Url::from_file_path(Path::new(r"\\ucn\")), Err(()));
}
if cfg!(unix) {
assert_from_file_path!("/foo/bar");
assert_from_file_path!("/foo/ba\0r", "/foo/ba%00r");
assert_from_file_path!("/foo/ba%00r", "/foo/ba%2500r");
}
}
#[test]
#[cfg(unix)]
fn new_path_bad_utf8() {
use std::ffi::OsStr;
use std::os::unix::prelude::*;
let url = Url::from_file_path(Path::new(OsStr::from_bytes(b"/foo/ba\x80r"))).unwrap();
let os_str = OsStr::from_bytes(b"/foo/ba\x80r");
assert_eq!(url.to_file_path(), Ok(PathBuf::from(os_str)));
}
#[test]
fn new_path_windows_fun() {
if cfg!(windows) {
assert_from_file_path!(r"C:\foo\bar", "/C:/foo/bar");
assert_from_file_path!("C:\\foo\\ba\0r", "/C:/foo/ba%00r");
// Invalid UTF-8
assert!(Url::parse("file:///C:/foo/ba%80r").unwrap().to_file_path().is_err());
// test windows canonicalized path
let path = PathBuf::from(r"\\?\C:\foo\bar");
assert!(Url::from_file_path(path).is_ok());
}
}
#[test]
fn new_directory_paths() {
if cfg!(unix) {
assert_eq!(Url::from_directory_path(Path::new("relative")), Err(()));
assert_eq!(Url::from_directory_path(Path::new("../relative")), Err(()));
let url = Url::from_directory_path(Path::new("/foo/bar")).unwrap();
assert_eq!(url.host(), None);
assert_eq!(url.path(), "/foo/bar/");
}
if cfg!(windows) {
assert_eq!(Url::from_directory_path(Path::new("relative")), Err(()));
assert_eq!(Url::from_directory_path(Path::new(r"..\relative")), Err(()));
assert_eq!(Url::from_directory_path(Path::new(r"\drive-relative")), Err(()));
assert_eq!(Url::from_directory_path(Path::new(r"\\ucn\")), Err(()));
let url = Url::from_directory_path(Path::new(r"C:\foo\bar")).unwrap();
assert_eq!(url.host(), None);
assert_eq!(url.path(), "/C:/foo/bar/");
}
}
#[test]
fn from_str() {
assert!("http://testing.com/this".parse::<Url>().is_ok());
}
#[test]
fn issue_124() {
let url: Url = "file:a".parse().unwrap();
assert_eq!(url.path(), "/a");
let url: Url = "file:...".parse().unwrap();
assert_eq!(url.path(), "/...");
let url: Url = "file:..".parse().unwrap();
assert_eq!(url.path(), "/");
}
#[test]
fn test_equality() {
use std::hash::{Hash, Hasher, SipHasher};
fn check_eq(a: &Url, b: &Url) {
assert_eq!(a, b);
let mut h1 = SipHasher::new();
a.hash(&mut h1);
let mut h2 = SipHasher::new();
b.hash(&mut h2);
assert_eq!(h1.finish(), h2.finish());
}
fn url(s: &str) -> Url {
let rv = s.parse().unwrap();
check_eq(&rv, &rv);
rv
}
// Doesn't care if default port is given.
let a: Url = url("https://example.com/");
let b: Url = url("https://example.com:443/");
check_eq(&a, &b);
// Different ports
let a: Url = url("http://example.com/");
let b: Url = url("http://example.com:8080/");
assert!(a != b, "{:?} != {:?}", a, b);
// Different scheme
let a: Url = url("http://example.com/");
let b: Url = url("https://example.com/");
assert!(a != b);
// Different host
let a: Url = url("http://foo.com/");
let b: Url = url("http://bar.com/");
assert!(a != b);
// Missing path, automatically substituted. Semantically the same.
let a: Url = url("http://foo.com");
let b: Url = url("http://foo.com/");
check_eq(&a, &b);
}
#[test]
fn host() {
fn assert_host(input: &str, host: Host<&str>) {
assert_eq!(Url::parse(input).unwrap().host(), Some(host));
}
assert_host("http://www.mozilla.org", Host::Domain("www.mozilla.org"));
assert_host("http://1.35.33.49", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
assert_host("http://[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", Host::Ipv6(Ipv6Addr::new(
0x2001, 0x0db8, 0x85a3, 0x08d3, 0x1319, 0x8a2e, 0x0370, 0x7344)));
assert_host("http://1.35.+33.49", Host::Domain("1.35.+33.49"));
assert_host("http://[::]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 0)));
assert_host("http://[::1]", Host::Ipv6(Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1)));
assert_host("http://0x1.0X23.0x21.061", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
assert_host("http://0x1232131", Host::Ipv4(Ipv4Addr::new(1, 35, 33, 49)));
assert_host("http://111", Host::Ipv4(Ipv4Addr::new(0, 0, 0, 111)));
assert_host("http://2..2.3", Host::Domain("2..2.3"));
assert!(Url::parse("http://42.0x1232131").is_err());
assert!(Url::parse("http://192.168.0.257").is_err());
}
#[test]
fn host_serialization() {
// libstds `Display for Ipv6Addr` serializes 0:0:0:0:0:0:_:_ and 0:0:0:0:0:ffff:_:_
// using IPv4-like syntax, as suggested in https://tools.ietf.org/html/rfc5952#section-4
// but https://url.spec.whatwg.org/#concept-ipv6-serializer specifies not to.
// Not [::0.0.0.2] / [::ffff:0.0.0.2]
assert_eq!(Url::parse("http://[0::2]").unwrap().host_str(), Some("[::2]"));
assert_eq!(Url::parse("http://[0::ffff:0:2]").unwrap().host_str(), Some("[::ffff:0:2]"));
}
#[test]
fn test_idna() {
assert!("http://goșu.ro".parse::<Url>().is_ok());
assert_eq!(Url::parse("http://☃.net/").unwrap().host(), Some(Host::Domain("xn--n3h.net")));
}
#[test]
fn test_serialization() {
let data = [
("http://example.com/", "http://example.com/"),
("http://addslash.com", "http://addslash.com/"),
("http://@emptyuser.com/", "http://emptyuser.com/"),
("http://:@emptypass.com/", "http://:@emptypass.com/"),
("http://user@user.com/", "http://user@user.com/"),
("http://user:pass@userpass.com/", "http://user:pass@userpass.com/"),
("http://slashquery.com/path/?q=something", "http://slashquery.com/path/?q=something"),
("http://noslashquery.com/path?q=something", "http://noslashquery.com/path?q=something")
];
for &(input, result) in &data {
let url = Url::parse(input).unwrap();
assert_eq!(url.as_str(), result);
}
}
#[test]
fn test_form_urlencoded() {
let pairs: &[(Cow<str>, Cow<str>)] = &[
("foo".into(), "é&".into()),
("bar".into(), "".into()),
("foo".into(), "#".into())
];
let encoded = form_urlencoded::Serializer::new(String::new()).extend_pairs(pairs).finish();
assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23");
assert_eq!(form_urlencoded::parse(encoded.as_bytes()).collect::<Vec<_>>(), pairs.to_vec());
}
#[test]
fn test_form_serialize() {
let encoded = form_urlencoded::Serializer::new(String::new())
.append_pair("foo", "é&")
.append_pair("bar", "")
.append_pair("foo", "#")
.finish();
assert_eq!(encoded, "foo=%C3%A9%26&bar=&foo=%23");
}
#[test]
/// https://github.com/servo/rust-url/issues/25
fn issue_25() {
let filename = if cfg!(windows) { r"C:\run\pg.sock" } else { "/run/pg.sock" };
let mut url = Url::from_file_path(filename).unwrap();
url.assert_invariants();
url.set_scheme("postgres").unwrap();
url.assert_invariants();
url.set_host(Some("")).unwrap();
url.assert_invariants();
url.set_username("me").unwrap();
url.assert_invariants();
let expected = format!("postgres://me@/{}run/pg.sock", if cfg!(windows) { "C:/" } else { "" });
assert_eq!(url.as_str(), expected);
}
#[test]
/// https://github.com/servo/rust-url/issues/61
fn issue_61() {
let mut url = Url::parse("http://mozilla.org").unwrap();
url.set_scheme("https").unwrap();
assert_eq!(url.port(), None);
assert_eq!(url.port_or_known_default(), Some(443));
url.assert_invariants();
}
#[test]
#[cfg(not(windows))]
/// https://github.com/servo/rust-url/issues/197
fn issue_197() {
let mut url = Url::from_file_path("/").expect("Failed to parse path");
url.assert_invariants();
assert_eq!(url, Url::parse("file:///").expect("Failed to parse path + protocol"));
url.path_segments_mut().expect("path_segments_mut").pop_if_empty();
}
#[test]
/// https://github.com/servo/rust-url/issues/222
fn append_trailing_slash() {
let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap();
url.assert_invariants();
url.path_segments_mut().unwrap().push("");
url.assert_invariants();
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/?a=b");
}
#[test]
/// https://github.com/servo/rust-url/issues/227
fn extend_query_pairs_then_mutate() {
let mut url: Url = "http://localhost:6767/foo/bar".parse().unwrap();
url.query_pairs_mut().extend_pairs(vec![ ("auth", "my-token") ].into_iter());
url.assert_invariants();
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?auth=my-token");
url.path_segments_mut().unwrap().push("some_other_path");
url.assert_invariants();
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar/some_other_path?auth=my-token");
}
#[test]
/// https://github.com/servo/rust-url/issues/222
fn append_empty_segment_then_mutate() {
let mut url: Url = "http://localhost:6767/foo/bar?a=b".parse().unwrap();
url.assert_invariants();
url.path_segments_mut().unwrap().push("").pop();
url.assert_invariants();
assert_eq!(url.to_string(), "http://localhost:6767/foo/bar?a=b");
}

File diff suppressed because it is too large Load Diff

View File

@ -18,29 +18,8 @@ version = "0.1.0"
dependencies = [
"mp4parse_capi 0.5.1",
"nsstring 0.1.0",
"rust_url_capi 0.0.1",
]
[[package]]
name = "idna"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "matches"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "mp4parse"
version = "0.5.1"
@ -70,41 +49,5 @@ dependencies = [
"nsstring 0.1.0",
]
[[package]]
name = "rust_url_capi"
version = "0.0.1"
dependencies = [
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-bidi"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-normalization"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "url"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
"checksum matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bcc3ad8109fa4b522f9b0cd81440422781f564aaf8c195de6b9d6642177ad0dd"
"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"
"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"
"checksum url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8527c62d9869a08325c38272b3f85668df22a65890c61a639d233dc0ed0b23a2"

View File

@ -16,29 +16,8 @@ version = "0.1.0"
dependencies = [
"mp4parse_capi 0.5.1",
"nsstring 0.1.0",
"rust_url_capi 0.0.1",
]
[[package]]
name = "idna"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "matches"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "mp4parse"
version = "0.5.1"
@ -57,41 +36,5 @@ dependencies = [
name = "nsstring"
version = "0.1.0"
[[package]]
name = "rust_url_capi"
version = "0.0.1"
dependencies = [
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-bidi"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unicode-normalization"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "url"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855"
"checksum idna 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1053236e00ce4f668aeca4a769a09b3bf5a682d802abd6f3cb39374f6b162c11"
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
"checksum matches 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bcc3ad8109fa4b522f9b0cd81440422781f564aaf8c195de6b9d6642177ad0dd"
"checksum unicode-bidi 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c1f7ceb96afdfeedee42bade65a0d585a6a0106f681b6749c8ff4daa8df30b3f"
"checksum unicode-normalization 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "26643a2f83bac55f1976fb716c10234485f9202dcd65cfbdf9da49867b271172"
"checksum url 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8527c62d9869a08325c38272b3f85668df22a65890c61a639d233dc0ed0b23a2"

View File

@ -8,7 +8,6 @@ description = "Shared Rust code for libxul"
[dependencies]
mp4parse_capi = { path = "../../../../media/libstagefright/binding/mp4parse_capi" }
nsstring = { path = "../../../../xpcom/rust/nsstring" }
rust_url_capi = { path = "../../../../netwerk/base/rust-url-capi" }
[lib]
path = "lib.rs"

View File

@ -4,4 +4,3 @@
extern crate mp4parse_capi;
extern crate nsstring;
extern crate rust_url_capi;