Bug 1403802: Port nsHttpHandler::PrepareAcceptLanguages over to Rust; r=valentin,froydnj

This commit is contained in:
Jeff Hemphill 2017-10-22 11:27:59 -07:00
parent 8e5cbb6713
commit e74e880a6c
9 changed files with 192 additions and 117 deletions

View File

@ -0,0 +1,8 @@
[package]
name = "netwerk_helper"
version = "0.0.1"
authors = ["Jeff Hemphill <jthemphill@mozilla.com>"]
[dependencies]
nserror = { path = "../../../xpcom/rust/nserror" }
nsstring = { path = "../../../xpcom/rust/nsstring" }

View File

@ -0,0 +1,14 @@
#ifndef RUST_NS_NET_HELPER
#define RUST_NS_NET_HELPER
#include "nsError.h"
#include "nsString.h"
extern "C" {
nsresult
rust_prepare_accept_languages(const nsACString* i_accept_languages,
nsACString* o_accept_languages);
}
#endif // RUST_NS_NET_HELPER

View File

@ -0,0 +1,143 @@
use std::ascii::AsciiExt;
extern crate nserror;
use self::nserror::*;
extern crate nsstring;
use self::nsstring::nsACString;
/// HTTP leading whitespace, defined in netwerk/protocol/http/nsHttp.h
static HTTP_LWS: &'static [u8] = &[' ' as u8, '\t' as u8];
/// Trim leading whitespace, trailing whitespace, and quality-value
/// from a token.
fn trim_token(token: &[u8]) -> &[u8] {
// Trim left whitespace
let ltrim = token.iter()
.take_while(|c| HTTP_LWS.iter().any(|ws| &ws == c))
.count();
// Trim right whitespace
// remove "; q=..." if present
let rtrim = token[ltrim..]
.iter()
.take_while(|c| **c != (';' as u8) && HTTP_LWS.iter().all(|ws| ws != *c))
.count();
&token[ltrim..ltrim + rtrim]
}
#[no_mangle]
#[allow(non_snake_case)]
/// Allocates an nsACString that contains a ISO 639 language list
/// notated with HTTP "q" values for output with an HTTP Accept-Language
/// header. Previous q values will be stripped because the order of
/// the langs implies the q value. The q values are calculated by dividing
/// 1.0 amongst the number of languages present.
///
/// Ex: passing: "en, ja"
/// returns: "en,ja;q=0.5"
///
/// passing: "en, ja, fr_CA"
/// returns: "en,ja;q=0.7,fr_CA;q=0.3"
pub extern "C" fn rust_prepare_accept_languages<'a, 'b>(i_accept_languages: &'a nsACString,
o_accept_languages: &'b mut nsACString)
-> nsresult {
if i_accept_languages.is_empty() {
return NS_OK;
}
let make_tokens = || {
i_accept_languages.split(|c| *c == (',' as u8))
.map(|token| trim_token(token))
.filter(|token| token.len() != 0)
};
let n = make_tokens().count();
for (count_n, i_token) in make_tokens().enumerate() {
// delimiter if not first item
if count_n != 0 {
o_accept_languages.append(",");
}
let token_pos = o_accept_languages.len();
o_accept_languages.append(&i_token as &[u8]);
{
let o_token = o_accept_languages.to_mut();
canonicalize_language_tag(&mut o_token[token_pos..]);
}
// Divide the quality-values evenly among the languages.
let q = 1.0 - count_n as f32 / n as f32;
let u: u32 = ((q + 0.005) * 100.0) as u32;
// Only display q-value if less than 1.00.
if u < 100 {
// With a small number of languages, one decimal place is
// enough to prevent duplicate q-values.
// Also, trailing zeroes do not add any information, so
// they can be removed.
if n < 10 || u % 10 == 0 {
let u = (u + 5) / 10;
o_accept_languages.append(&format!(";q=0.{}", u));
} else {
// Values below 10 require zero padding.
o_accept_languages.append(&format!(";q=0.{:02}", u));
}
}
}
NS_OK
}
/// Defines a consistent capitalization for a given language string.
///
/// # Arguments
/// * `token` - a narrow char slice describing a language.
///
/// Valid language tags are of the form
/// "*", "fr", "en-US", "es-419", "az-Arab", "x-pig-latin", "man-Nkoo-GN"
///
/// Language tags are defined in the
/// [rfc5646](https://tools.ietf.org/html/rfc5646) spec. According to
/// the spec:
///
/// > At all times, language tags and their subtags, including private
/// > use and extensions, are to be treated as case insensitive: there
/// > exist conventions for the capitalization of some of the subtags,
/// > but these MUST NOT be taken to carry meaning.
///
/// So why is this code even here? See bug 1108183, I guess.
fn canonicalize_language_tag(token: &mut [u8]) {
for c in token.iter_mut() {
*c = AsciiExt::to_ascii_lowercase(c);
}
let sub_tags = token.split_mut(|c| *c == ('-' as u8));
for (i, mut sub_tag) in sub_tags.enumerate() {
if i == 0 {
// ISO 639-1 language code, like the "en" in "en-US"
continue;
}
match sub_tag.len() {
// Singleton tag, like "x" or "i". These signify a
// non-standard language, so we stop capitalizing after
// these.
1 => break,
// ISO 3166-1 Country code, like "US"
2 => {
sub_tag[0] = AsciiExt::to_ascii_uppercase(&sub_tag[0]);
sub_tag[1] = AsciiExt::to_ascii_uppercase(&sub_tag[1]);
},
// ISO 15924 script code, like "Nkoo"
4 => {
sub_tag[0] = AsciiExt::to_ascii_uppercase(&sub_tag[0]);
},
_ => {},
};
}
}

View File

@ -9,6 +9,7 @@
#include "prsystem.h"
#include "nsError.h"
#include "nsHttp.h"
#include "nsHttpHandler.h"
#include "nsHttpChannel.h"
@ -58,6 +59,7 @@
#include "nsIXULRuntime.h"
#include "nsCharSeparatedTokenizer.h"
#include "nsRFPService.h"
#include "rust-helper/src/helper.h"
#include "mozilla/net/NeckoChild.h"
#include "mozilla/net/NeckoParent.h"
@ -1908,51 +1910,6 @@ nsHttpHandler::PrefsChanged(nsIPrefBranch *prefs, const char *pref)
#undef MULTI_PREF_CHANGED
}
/**
* Currently, only regularizes the case of subtags.
*/
static void
CanonicalizeLanguageTag(char *languageTag)
{
char *s = languageTag;
while (*s != '\0') {
*s = nsCRT::ToLower(*s);
s++;
}
s = languageTag;
bool isFirst = true;
bool seenSingleton = false;
while (*s != '\0') {
char *subTagEnd = strchr(s, '-');
if (subTagEnd == nullptr) {
subTagEnd = strchr(s, '\0');
}
if (isFirst) {
isFirst = false;
} else if (seenSingleton) {
// Do nothing
} else {
size_t subTagLength = subTagEnd - s;
if (subTagLength == 1) {
seenSingleton = true;
} else if (subTagLength == 2) {
*s = nsCRT::ToUpper(*s);
*(s + 1) = nsCRT::ToUpper(*(s + 1));
} else if (subTagLength == 4) {
*s = nsCRT::ToUpper(*s);
}
}
s = subTagEnd;
if (*s != '\0') {
s++;
}
}
}
/**
* Allocates a C string into that contains a ISO 639 language list
* notated with HTTP "q" values for output with a HTTP Accept-Language
@ -1972,78 +1929,9 @@ PrepareAcceptLanguages(const char *i_AcceptLanguages, nsACString &o_AcceptLangua
if (!i_AcceptLanguages)
return NS_OK;
uint32_t n, count_n, size, wrote;
double q, dec;
char *p, *p2, *token, *q_Accept, *o_Accept;
const char *comma;
int32_t available;
o_Accept = strdup(i_AcceptLanguages);
if (!o_Accept)
return NS_ERROR_OUT_OF_MEMORY;
for (p = o_Accept, n = size = 0; '\0' != *p; p++) {
if (*p == ',') n++;
size++;
}
available = size + ++n * 11 + 1;
q_Accept = new char[available];
if (!q_Accept) {
free(o_Accept);
return NS_ERROR_OUT_OF_MEMORY;
}
*q_Accept = '\0';
q = 1.0;
dec = q / (double) n;
count_n = 0;
p2 = q_Accept;
for (token = nsCRT::strtok(o_Accept, ",", &p);
token != nullptr;
token = nsCRT::strtok(p, ",", &p))
{
token = net_FindCharNotInSet(token, HTTP_LWS);
char* trim;
trim = net_FindCharInSet(token, ";" HTTP_LWS);
if (trim != nullptr) // remove "; q=..." if present
*trim = '\0';
if (*token != '\0') {
CanonicalizeLanguageTag(token);
comma = count_n++ != 0 ? "," : ""; // delimiter if not first item
uint32_t u = QVAL_TO_UINT(q);
// Only display q-value if less than 1.00.
if (u < 100) {
const char *qval_str;
// With a small number of languages, one decimal place is enough to prevent duplicate q-values.
// Also, trailing zeroes do not add any information, so they can be removed.
if ((n < 10) || ((u % 10) == 0)) {
u = (u + 5) / 10;
qval_str = "%s%s;q=0.%u";
} else {
// Values below 10 require zero padding.
qval_str = "%s%s;q=0.%02u";
}
wrote = snprintf(p2, available, qval_str, comma, token, u);
} else {
wrote = snprintf(p2, available, "%s%s", comma, token);
}
q -= dec;
p2 += wrote;
available -= wrote;
MOZ_ASSERT(available > 0, "allocated string not long enough");
}
}
free(o_Accept);
o_AcceptLanguages.Assign((const char *) q_Accept);
delete [] q_Accept;
return NS_OK;
const nsAutoCString ns_accept_languages(i_AcceptLanguages);
return rust_prepare_accept_languages(&ns_accept_languages,
&o_AcceptLanguages);
}
nsresult

View File

@ -22,6 +22,8 @@ function run_test() {
["de,en-us,en", "de,en-US;q=0.7,en;q=0.3"],
["en-US, en", "en-US,en;q=0.5"],
["EN-US;q=0.2, EN", "en-US,en;q=0.5"],
["en ;q=0.8, de ", "en,de;q=0.5"],
[",en,", "en"],
];
for (let i = 0; i < testData.length; i++) {

View File

@ -563,6 +563,7 @@ dependencies = [
"geckoservo 0.0.1",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"mp4parse_capi 0.8.0",
"netwerk_helper 0.0.1",
"nserror 0.1.0",
"nsstring 0.1.0",
"rust_url_capi 0.0.1",
@ -849,6 +850,14 @@ dependencies = [
"ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "netwerk_helper"
version = "0.0.1"
dependencies = [
"nserror 0.1.0",
"nsstring 0.1.0",
]
[[package]]
name = "nodrop"
version = "0.1.9"

View File

@ -562,6 +562,7 @@ dependencies = [
"geckoservo 0.0.1",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
"mp4parse_capi 0.8.0",
"netwerk_helper 0.0.1",
"nserror 0.1.0",
"nsstring 0.1.0",
"rust_url_capi 0.0.1",
@ -844,6 +845,14 @@ dependencies = [
"ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "netwerk_helper"
version = "0.0.1"
dependencies = [
"nserror 0.1.0",
"nsstring 0.1.0",
]
[[package]]
name = "nodrop"
version = "0.1.9"

View File

@ -10,6 +10,7 @@ geckoservo = { path = "../../../../servo/ports/geckolib", optional = true }
mp4parse_capi = { path = "../../../../media/libstagefright/binding/mp4parse_capi" }
nsstring = { path = "../../../../xpcom/rust/nsstring" }
nserror = { path = "../../../../xpcom/rust/nserror" }
netwerk_helper = { path = "../../../../netwerk/base/rust-helper" }
rust_url_capi = { path = "../../../../netwerk/base/rust-url-capi" }
webrender_bindings = { path = "../../../../gfx/webrender_bindings", optional = true }
cubeb-pulse = { path = "../../../../media/libcubeb/cubeb-pulse-rs", optional = true, features=["pulse-dlopen"] }

View File

@ -9,6 +9,7 @@ extern crate mp4parse_capi;
extern crate nsstring;
extern crate nserror;
extern crate rust_url_capi;
extern crate netwerk_helper;
#[cfg(feature = "quantum_render")]
extern crate webrender_bindings;
#[cfg(feature = "cubeb_pulse_rust")]