Bug 1908759 - Don't use UTS 46 ToUnicode from nsDocShell::AttemptURIFixup. r=necko-reviewers,jesup,valentin

Differential Revision: https://phabricator.services.mozilla.com/D217009
This commit is contained in:
Henri Sivonen 2024-08-14 18:30:08 +00:00
parent 19e28ccc4f
commit 4496b3ed9b
5 changed files with 39 additions and 48 deletions

View File

@ -5992,29 +5992,16 @@ already_AddRefed<nsIURI> nsDocShell::AttemptURIFixup(
//
// If this string was passed through nsStandardURL by
// chance, then it may have been converted from UTF-8 to
// ACE, which would result in a completely bogus keyword
// Punycode, which would result in a completely bogus keyword
// query. Here we try to recover the original Unicode
// value, but this is not 100% correct since the value may
// have been normalized per the IDN normalization rules.
//
// Since we don't have access to the exact original string
// that was entered by the user, this will just have to do.
//
// XXX: Since we are not trying to use the result as an
// actual domain name, label-wise Punycode decode would
// likely be more appropriate than the full ToUnicode
// operation.
bool isACE;
nsAutoCString utf8Host;
nsCOMPtr<nsIIDNService> idnSrv =
do_GetService(NS_IDNSERVICE_CONTRACTID);
if (idnSrv && NS_SUCCEEDED(idnSrv->IsACE(host, &isACE)) && isACE &&
NS_SUCCEEDED(idnSrv->ConvertACEtoUTF8(host, utf8Host))) {
info = KeywordToURI(utf8Host, aUsePrivateBrowsing);
} else {
info = KeywordToURI(host, aUsePrivateBrowsing);
}
mozilla_net_recover_keyword_from_punycode(&host, &utf8Host);
info = KeywordToURI(utf8Host, aUsePrivateBrowsing);
}
if (info) {
info->GetPreferredURI(getter_AddRefs(newURI));

View File

@ -228,3 +228,34 @@ fn process<OutputUnicode: FnMut(&[char], &[char], bool) -> bool>(
dst.assign(src);
nserror::NS_OK
}
/// Not general-purpose! Only to be used from `nsDocShell::AttemptURIFixup`.
#[no_mangle]
pub unsafe extern "C" fn mozilla_net_recover_keyword_from_punycode(
src: *const nsACString,
dst: *mut nsACString,
) {
let sink = &mut (*dst);
let mut seen_label = false;
for label in (*src).split(|b| *b == b'.') {
if seen_label {
sink.append(".");
}
seen_label = true;
// We know the Punycode prefix is in lower case if we got it from
// our own IDNA conversion code.
if let Some(punycode) = label.strip_prefix(b"xn--") {
// Not bothering to optimize this.
// Just unwrap, since we know our IDNA conversion code gives
// us ASCII here.
let utf8 = std::str::from_utf8(punycode).unwrap();
if let Some(decoded) = idna::punycode::decode_to_string(utf8) {
sink.append(&decoded);
} else {
sink.append(label);
}
} else {
sink.append(label);
}
}
}

View File

@ -183,32 +183,6 @@ NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
return NS_DomainToUnicodeAllowAnyGlyphfulASCII(input, _retval);
}
NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
// look for the ACE prefix in the input string. it may occur
// at the beginning of any segment in the domain name. for
// example: "www.xn--ENCODED.com"
if (!IsAscii(input)) {
*_retval = false;
return NS_OK;
}
auto stringContains = [](const nsACString& haystack,
const nsACString& needle) {
return std::search(haystack.BeginReading(), haystack.EndReading(),
needle.BeginReading(), needle.EndReading(),
[](unsigned char ch1, unsigned char ch2) {
return tolower(ch1) == tolower(ch2);
}) != haystack.EndReading();
};
*_retval =
StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
(!input.IsEmpty() && input[0] != '.' &&
stringContains(input, ".xn--"_ns));
return NS_OK;
}
NS_IMETHODIMP nsIDNService::DomainToDisplay(const nsACString& input,
nsACString& _retval) {
nsresult rv = NS_DomainToDisplay(input, _retval);

View File

@ -77,9 +77,4 @@ interface nsIIDNService : nsISupports
* off using the `idna` crate directly.
*/
AUTF8String convertACEtoUTF8(in ACString input);
/**
* DO NOT USE! Checks if the input string is ACE encoded or not.
*/
boolean isACE(in ACString input);
};

View File

@ -7,6 +7,10 @@ const idnService = Cc["@mozilla.org/network/idn-service;1"].getService(
);
add_task(async function test_simple() {
function isACE(domain) {
return domain.startsWith("xn--") || domain.indexOf(".xn--") > -1;
}
let reference = [
// The 3rd element indicates whether the second element
// is ACE-encoded
@ -23,6 +27,6 @@ add_task(async function test_simple() {
Assert.equal(idnService.convertUTF8toACE(reference[i][0]), reference[i][1]);
Assert.equal(idnService.convertUTF8toACE(reference[i][1]), reference[i][1]);
Assert.equal(idnService.convertACEtoUTF8(reference[i][1]), reference[i][0]);
Assert.equal(idnService.isACE(reference[i][1]), reference[i][2]);
Assert.equal(isACE(reference[i][1]), reference[i][2]);
}
});