Bug 1492938 - Attempt compiling PAC scripts as UTF-8 first if they're valid UTF-8, and only if that fails inflate to UTF-16 and compile that. r=bagder

--HG--
extra : rebase_source : a8933485f054292222bfbba4774e3fa41be1f8c3
This commit is contained in:
Jeff Walden 2018-12-04 14:40:45 -05:00
parent 0914899c29
commit ad17bf59a4
4 changed files with 105 additions and 24 deletions

View File

@ -16,9 +16,12 @@
#include "nsJSUtils.h"
#include "jsfriendapi.h"
#include "js/CompilationAndEvaluation.h"
#include "js/SourceText.h"
#include "js/Utility.h"
#include "prnetdb.h"
#include "nsITimer.h"
#include "mozilla/net/DNS.h"
#include "mozilla/Utf8.h"
#include "nsServiceManagerUtils.h"
#include "nsNetCID.h"
@ -30,8 +33,10 @@ namespace net {
// evaluations. Additionally dnsResolve(host) and myIpAddress() are supplied in
// the same context but are implemented as c++ helpers. alert(msg) is similarly
// defined.
//
// Per ProxyAutoConfig::Init, this data must be ASCII.
static const char *sPacUtils =
static const char sAsciiPacUtils[] =
"function dnsDomainIs(host, domain) {\n"
" return (host.length >= domain.length &&\n"
" host.substring(host.length - domain.length) == domain);\n"
@ -655,14 +660,28 @@ void ProxyAutoConfig::SetThreadLocalIndex(uint32_t index) {
}
nsresult ProxyAutoConfig::Init(const nsCString &aPACURI,
const nsCString &aPACScript, bool aIncludePath,
uint32_t aExtraHeapSize,
const nsCString &aPACScriptData,
bool aIncludePath, uint32_t aExtraHeapSize,
nsIEventTarget *aEventTarget) {
mShutdown = false; // Shutdown needs to be called prior to destruction
mPACURI = aPACURI;
mPACScript = sPacUtils;
mPACScript.Append(aPACScript);
// The full PAC script data is the concatenation of 1) the various functions
// exposed to PAC scripts in |sAsciiPacUtils| and 2) the user-provided PAC
// script data. Historically this was single-byte Latin-1 text (usually just
// ASCII, but bug 296163 has a real-world Latin-1 example). We now support
// UTF-8 if the full data validates as UTF-8, before falling back to Latin-1.
// (Technically this is a breaking change: intentional Latin-1 scripts that
// happen to be valid UTF-8 may have different behavior. We assume such cases
// are vanishingly rare.)
//
// Supporting both UTF-8 and Latin-1 requires that the functions exposed to
// PAC scripts be both UTF-8- and Latin-1-compatible: that is, they must be
// ASCII.
mConcatenatedPACData = sAsciiPacUtils;
mConcatenatedPACData.Append(aPACScriptData);
mIncludePath = aIncludePath;
mExtraHeapSize = aExtraHeapSize;
mMainThreadEventTarget = aEventTarget;
@ -680,7 +699,7 @@ nsresult ProxyAutoConfig::SetupJS() {
delete mJSContext;
mJSContext = nullptr;
if (mPACScript.IsEmpty()) return NS_ERROR_FAILURE;
if (mConcatenatedPACData.IsEmpty()) return NS_ERROR_FAILURE;
NS_GetCurrentThread()->SetCanInvokeJS(true);
@ -706,8 +725,25 @@ nsresult ProxyAutoConfig::SetupJS() {
JS::CompileOptions options(cx);
options.setFileAndLine(this->mPACURI.get(), 1);
return JS::CompileLatin1(cx, options, this->mPACScript.get(),
this->mPACScript.Length(), script);
// Per ProxyAutoConfig::Init, compile as UTF-8 if the full data is UTF-8,
// and otherwise inflate Latin-1 to UTF-16 and compile that.
const char *scriptData = this->mConcatenatedPACData.get();
size_t scriptLength = this->mConcatenatedPACData.Length();
if (mozilla::IsValidUtf8(scriptData, scriptLength)) {
return JS::CompileUtf8(cx, options, scriptData, scriptLength, script);
}
// nsReadableUtils.h says that "ASCII" is a misnomer "for legacy reasons",
// and this handles not just ASCII but Latin-1 too.
NS_ConvertASCIItoUTF16 inflated(this->mConcatenatedPACData);
JS::SourceText<char16_t> source;
if (!source.init(cx, inflated.get(), inflated.Length(),
JS::SourceOwnership::Borrowed)) {
return false;
}
return JS::Compile(cx, options, source, script);
};
JS::Rooted<JSScript *> script(cx);
@ -735,7 +771,7 @@ nsresult ProxyAutoConfig::SetupJS() {
PACLogToConsole(alertMessage);
// we don't need these now
mPACScript.Truncate();
mConcatenatedPACData.Truncate();
mPACURI.Truncate();
return NS_OK;

View File

@ -31,7 +31,7 @@ class ProxyAutoConfig {
ProxyAutoConfig();
~ProxyAutoConfig();
nsresult Init(const nsCString &aPACURI, const nsCString &aPACScript,
nsresult Init(const nsCString &aPACURI, const nsCString &aPACScriptData,
bool aIncludePath, uint32_t aExtraHeapSize,
nsIEventTarget *aEventTarget);
void SetThreadLocalIndex(uint32_t index);
@ -92,7 +92,7 @@ class ProxyAutoConfig {
JSContextWrapper *mJSContext;
bool mJSNeedsSetup;
bool mShutdown;
nsCString mPACScript;
nsCString mConcatenatedPACData;
nsCString mPACURI;
bool mIncludePath;
uint32_t mExtraHeapSize;

View File

@ -240,10 +240,10 @@ class ExecutePACThreadAction final : public Runnable {
mShutdown = aShutdown;
}
void SetupPAC(const char *text, uint32_t datalen, const nsACString &pacURI,
void SetupPAC(const char *data, uint32_t dataLen, const nsACString &pacURI,
uint32_t extraHeapSize) {
mSetupPAC = true;
mSetupPACData.Assign(text, datalen);
mSetupPACData.Assign(data, dataLen);
mSetupPACURI = pacURI;
mExtraHeapSize = extraHeapSize;
}
@ -815,18 +815,13 @@ nsPACMan::OnStreamComplete(nsIStreamLoader *loader, nsISupports *context,
}
}
// We assume that the PAC text is ASCII (or ISO-Latin-1). We've had this
// assumption forever, and some real-world PAC scripts actually have some
// non-ASCII text in comment blocks (see bug 296163).
const char *text = (const char *)data;
// we have succeeded in loading the pac file using a bunch of interfaces
// that are main thread only, unfortunately we have to initialize the
// instance of the PAC evaluator (NS_PROXYAUTOCONFIG_CONTRACTID) on the pac
// thread, because that is where it will be used.
// We succeeded in loading the pac file using a bunch of interfaces that are
// main thread only. Unfortunately, we have to initialize the instance of
// the PAC evaluator (NS_PROXYAUTOCONFIG_CONTRACTID) on the PAC thread,
// because that's where it will be used.
RefPtr<ExecutePACThreadAction> pending = new ExecutePACThreadAction(this);
pending->SetupPAC(text, dataLen, pacURI, GetExtraJSContextHeapSize());
pending->SetupPAC(reinterpret_cast<const char *>(data), dataLen, pacURI,
GetExtraJSContextHeapSize());
DispatchToPAC(pending.forget());
LOG(("OnStreamComplete: process the PAC contents\n"));

View File

@ -600,6 +600,56 @@ function run_pac4_test() {
prefs.setIntPref("network.proxy.type", 2);
prefs.setCharPref("network.proxy.autoconfig_url", pac);
var req = pps.asyncResolve(channel, 0, new TestResolveCallback("http", run_utf8_pac_test));
}
function run_utf8_pac_test() {
var pac = 'data:text/plain;charset=UTF-8,' +
'function FindProxyForURL(url, host) {' +
' /*' +
' U+00A9 COPYRIGHT SIGN: %C2%A9,' +
' U+0B87 TAMIL LETTER I: %E0%AE%87,' +
' U+10398 UGARITIC LETTER THANNA: %F0%90%8E%98 ' +
' */' +
' var multiBytes = "%C2%A9 %E0%AE%87 %F0%90%8E%98"; ' +
' /* 6 UTF-16 units above if PAC script run as UTF-8; 11 units if run as Latin-1 */ ' +
' return multiBytes.length === 6 ' +
' ? "PROXY foopy:8080; DIRECT" ' +
' : "PROXY epicfail-utf8:12345; DIRECT";' +
'}';
var channel = NetUtil.newChannel({
uri: "http://www.mozilla.org/",
loadUsingSystemPrincipal: true
});
// Configure PAC
prefs.setIntPref("network.proxy.type", 2);
prefs.setCharPref("network.proxy.autoconfig_url", pac);
var req = pps.asyncResolve(channel, 0, new TestResolveCallback("http", run_latin1_pac_test));
}
function run_latin1_pac_test() {
var pac = 'data:text/plain,' +
'function FindProxyForURL(url, host) {' +
' /* A too-long encoding of U+0000, so not valid UTF-8 */ ' +
' var multiBytes = "%C0%80"; ' +
' /* 2 UTF-16 units because interpreted as Latin-1 */ ' +
' return multiBytes.length === 2 ' +
' ? "PROXY foopy:8080; DIRECT" ' +
' : "PROXY epicfail-latin1:12345; DIRECT";' +
'}';
var channel = NetUtil.newChannel({
uri: "http://www.mozilla.org/",
loadUsingSystemPrincipal: true
});
// Configure PAC
prefs.setIntPref("network.proxy.type", 2);
prefs.setCharPref("network.proxy.autoconfig_url", pac);
var req = pps.asyncResolve(channel, 0, new TestResolveCallback("http", finish_pac_test));
}