mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-10 03:45:46 +00:00
Bug 1561573 - Avoid linearization and inflation to UTF-16 of the string input to TextEncoder. r=jandem,bzbarsky
Differential Revision: https://phabricator.services.mozilla.com/D44121 --HG-- extra : moz-landing-system : lando
This commit is contained in:
parent
6c949a612d
commit
75e563c076
@ -13,33 +13,39 @@ namespace mozilla {
|
||||
namespace dom {
|
||||
|
||||
void TextEncoder::Encode(JSContext* aCx, JS::Handle<JSObject*> aObj,
|
||||
const nsAString& aString,
|
||||
JS::Handle<JSString*> aString,
|
||||
JS::MutableHandle<JSObject*> aRetval,
|
||||
ErrorResult& aRv) {
|
||||
// Given nsTSubstring<char16_t>::kMaxCapacity, it should be
|
||||
// impossible for the length computation to overflow, but
|
||||
// let's use checked math in case someone changes something
|
||||
// in the future.
|
||||
CheckedInt<size_t> bufLen(JS::GetStringLength(aString));
|
||||
bufLen *= 3; // from the contract for JS_EncodeStringToUTF8BufferPartial
|
||||
// Uint8Array::Create takes uint32_t as the length.
|
||||
CheckedInt<uint32_t> bufLen(aString.Length());
|
||||
bufLen *= 3; // from the contract for ConvertUTF16toUTF8
|
||||
if (!bufLen.isValid()) {
|
||||
if (!bufLen.isValid() || bufLen.value() > UINT32_MAX) {
|
||||
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: Avoid malloc and use a stack-allocated buffer if bufLen
|
||||
// is small.
|
||||
auto data = mozilla::MakeUniqueFallible<uint8_t[]>(bufLen.value());
|
||||
if (!data) {
|
||||
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t utf8Len = ConvertUtf16toUtf8(
|
||||
aString, MakeSpan(reinterpret_cast<char*>(data.get()), bufLen.value()));
|
||||
MOZ_ASSERT(utf8Len <= bufLen.value());
|
||||
size_t read;
|
||||
size_t written;
|
||||
auto maybe = JS_EncodeStringToUTF8BufferPartial(
|
||||
aCx, aString, AsWritableChars(MakeSpan(data.get(), bufLen.value())));
|
||||
if (!maybe) {
|
||||
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
|
||||
return;
|
||||
}
|
||||
Tie(read, written) = *maybe;
|
||||
MOZ_ASSERT(written <= bufLen.value());
|
||||
MOZ_ASSERT(read == JS::GetStringLength(aString));
|
||||
|
||||
JSAutoRealm ar(aCx, aObj);
|
||||
JSObject* outView = Uint8Array::Create(aCx, utf8Len, data.get());
|
||||
JSObject* outView = Uint8Array::Create(aCx, written, data.get());
|
||||
if (!outView) {
|
||||
aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
|
||||
return;
|
||||
@ -48,18 +54,26 @@ void TextEncoder::Encode(JSContext* aCx, JS::Handle<JSObject*> aObj,
|
||||
aRetval.set(outView);
|
||||
}
|
||||
|
||||
void TextEncoder::EncodeInto(const nsAString& aSrc, const Uint8Array& aDst,
|
||||
TextEncoderEncodeIntoResult& aResult) {
|
||||
void TextEncoder::EncodeInto(JSContext* aCx, JS::Handle<JSString*> aSrc,
|
||||
const Uint8Array& aDst,
|
||||
TextEncoderEncodeIntoResult& aResult,
|
||||
OOMReporter& aError) {
|
||||
aDst.ComputeLengthAndData();
|
||||
size_t read;
|
||||
size_t written;
|
||||
Tie(read, written) = ConvertUtf16toUtf8Partial(
|
||||
aSrc, MakeSpan(reinterpret_cast<char*>(aDst.Data()), aDst.Length()));
|
||||
auto maybe = JS_EncodeStringToUTF8BufferPartial(
|
||||
aCx, aSrc, AsWritableChars(MakeSpan(aDst.Data(), aDst.Length())));
|
||||
if (!maybe) {
|
||||
aError.ReportOOM();
|
||||
return;
|
||||
}
|
||||
Tie(read, written) = *maybe;
|
||||
MOZ_ASSERT(written <= aDst.Length());
|
||||
aResult.mRead.Construct() = read;
|
||||
aResult.mWritten.Construct() = written;
|
||||
}
|
||||
|
||||
void TextEncoder::GetEncoding(nsAString& aEncoding) {
|
||||
void TextEncoder::GetEncoding(nsACString& aEncoding) {
|
||||
aEncoding.AssignLiteral("utf-8");
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ class TextEncoder final : public NonRefcountedDOMObject {
|
||||
*
|
||||
* @param aEncoding, current encoding.
|
||||
*/
|
||||
void GetEncoding(nsAString& aEncoding);
|
||||
void GetEncoding(nsACString& aEncoding);
|
||||
|
||||
/**
|
||||
* Encodes incoming utf-16 code units/ DOM string to utf-8.
|
||||
@ -52,11 +52,12 @@ class TextEncoder final : public NonRefcountedDOMObject {
|
||||
* the aRetval out param.
|
||||
*/
|
||||
void Encode(JSContext* aCx, JS::Handle<JSObject*> aObj,
|
||||
const nsAString& aString, JS::MutableHandle<JSObject*> aRetval,
|
||||
ErrorResult& aRv);
|
||||
JS::Handle<JSString*> aString,
|
||||
JS::MutableHandle<JSObject*> aRetval, ErrorResult& aRv);
|
||||
|
||||
void EncodeInto(const nsAString& aSrc, const Uint8Array& aDst,
|
||||
TextEncoderEncodeIntoResult& aResult);
|
||||
void EncodeInto(JSContext* aCx, JS::Handle<JSString*> aSrc,
|
||||
const Uint8Array& aDst, TextEncoderEncodeIntoResult& aResult,
|
||||
OOMReporter& aError);
|
||||
};
|
||||
|
||||
} // namespace dom
|
||||
|
167
dom/encoding/test/unit/test_rope_encode.js
Normal file
167
dom/encoding/test/unit/test_rope_encode.js
Normal file
@ -0,0 +1,167 @@
|
||||
var concat = [
|
||||
{
|
||||
head: "a",
|
||||
tail: "b",
|
||||
expected: "ab",
|
||||
name: "Latin1 and Latin1",
|
||||
},
|
||||
{
|
||||
head: "α",
|
||||
tail: "β",
|
||||
expected: "αβ",
|
||||
name: "UTF-16 and UTF-16",
|
||||
},
|
||||
{
|
||||
head: "a",
|
||||
tail: "β",
|
||||
expected: "aβ",
|
||||
name: "Latin1 and UTF-16",
|
||||
},
|
||||
{
|
||||
head: "α",
|
||||
tail: "b",
|
||||
expected: "αb",
|
||||
name: "UTF-16 and Latin1",
|
||||
},
|
||||
{
|
||||
head: "\uD83D",
|
||||
tail: "\uDE03",
|
||||
expected: "\uD83D\uDE03",
|
||||
name: "Surrogate pair",
|
||||
},
|
||||
{
|
||||
head: "a\uD83D",
|
||||
tail: "\uDE03b",
|
||||
expected: "a\uD83D\uDE03b",
|
||||
name: "Surrogate pair with prefix and suffix",
|
||||
},
|
||||
{
|
||||
head: "\uD83D",
|
||||
tail: "b",
|
||||
expected: "\uFFFDb",
|
||||
name: "Unpaired high surrogate and Latin1",
|
||||
},
|
||||
{
|
||||
head: "a\uD83D",
|
||||
tail: "b",
|
||||
expected: "a\uFFFDb",
|
||||
name: "Prefixed unpaired high surrogate and Latin1",
|
||||
},
|
||||
{
|
||||
head: "\uD83D",
|
||||
tail: "β",
|
||||
expected: "\uFFFDβ",
|
||||
name: "Unpaired high surrogate and UTF-16",
|
||||
},
|
||||
{
|
||||
head: "a\uD83D",
|
||||
tail: "β",
|
||||
expected: "a\uFFFDβ",
|
||||
name: "Prefixed unpaired high surrogate and UTF-16",
|
||||
},
|
||||
|
||||
{
|
||||
head: "\uDE03",
|
||||
tail: "b",
|
||||
expected: "\uFFFDb",
|
||||
name: "Unpaired low surrogate and Latin1",
|
||||
},
|
||||
{
|
||||
head: "a\uDE03",
|
||||
tail: "b",
|
||||
expected: "a\uFFFDb",
|
||||
name: "Prefixed unpaired low surrogate and Latin1",
|
||||
},
|
||||
{
|
||||
head: "\uDE03",
|
||||
tail: "β",
|
||||
expected: "\uFFFDβ",
|
||||
name: "Unpaired low surrogate and UTF-16",
|
||||
},
|
||||
{
|
||||
head: "a\uDE03",
|
||||
tail: "β",
|
||||
expected: "a\uFFFDβ",
|
||||
name: "Prefixed unpaired low surrogate and UTF-16",
|
||||
},
|
||||
|
||||
{
|
||||
head: "a",
|
||||
tail: "\uDE03",
|
||||
expected: "a\uFFFD",
|
||||
name: "Latin1 and unpaired low surrogate",
|
||||
},
|
||||
{
|
||||
head: "a",
|
||||
tail: "\uDE03b",
|
||||
expected: "a\uFFFDb",
|
||||
name: "Latin1 and suffixed unpaired low surrogate",
|
||||
},
|
||||
{
|
||||
head: "α",
|
||||
tail: "\uDE03",
|
||||
expected: "α\uFFFD",
|
||||
name: "UTF-16 and unpaired low surrogate",
|
||||
},
|
||||
{
|
||||
head: "α",
|
||||
tail: "\uDE03b",
|
||||
expected: "α\uFFFDb",
|
||||
name: "UTF-16 and suffixed unpaired low surrogate",
|
||||
},
|
||||
|
||||
{
|
||||
head: "a",
|
||||
tail: "\uD83D",
|
||||
expected: "a\uFFFD",
|
||||
name: "Latin1 and unpaired high surrogate",
|
||||
},
|
||||
{
|
||||
head: "a",
|
||||
tail: "\uD83Db",
|
||||
expected: "a\uFFFDb",
|
||||
name: "Latin1 and suffixed unpaired high surrogate",
|
||||
},
|
||||
{
|
||||
head: "α",
|
||||
tail: "\uD83D",
|
||||
expected: "α\uFFFD",
|
||||
name: "UTF-16 and unpaired high surrogate",
|
||||
},
|
||||
{
|
||||
head: "α",
|
||||
tail: "\uD83Db",
|
||||
expected: "α\uFFFDb",
|
||||
name: "UTF-16 and suffixed unpaired high surrogate",
|
||||
},
|
||||
];
|
||||
|
||||
var testingFunctions = Cu.getJSTestingFunctions();
|
||||
concat.forEach(function(t) {
|
||||
test(function() {
|
||||
assert_true(
|
||||
testingFunctions.isSameCompartment(testingFunctions.newRope, this),
|
||||
"Must be in the same compartment"
|
||||
);
|
||||
var rope = testingFunctions.newRope(t.head, t.tail);
|
||||
var encoded = new TextEncoder().encode(rope);
|
||||
var decoded = new TextDecoder().decode(encoded);
|
||||
assert_equals(decoded, t.expected, "Must round-trip");
|
||||
}, t.name);
|
||||
});
|
||||
|
||||
test(function() {
|
||||
assert_true(
|
||||
testingFunctions.isSameCompartment(testingFunctions.newRope, this),
|
||||
"Must be in the same compartment"
|
||||
);
|
||||
var ab = testingFunctions.newRope("a", "b");
|
||||
var abc = testingFunctions.newRope(ab, "c");
|
||||
var ef = testingFunctions.newRope("e", "f");
|
||||
var def = testingFunctions.newRope("d", ef);
|
||||
var abcdef = testingFunctions.newRope(abc, def);
|
||||
var abcdefab = testingFunctions.newRope(abcdef, ab);
|
||||
var encoded = new TextEncoder().encode(abcdefab);
|
||||
var decoded = new TextDecoder().decode(encoded);
|
||||
assert_equals(decoded, "abcdefab", "Must walk the DAG correctly");
|
||||
}, "Complex rope DAG");
|
@ -9,3 +9,4 @@ head = head.js
|
||||
[test_misc.js]
|
||||
[test_shift_jis.js]
|
||||
[test_utf.js]
|
||||
[test_rope_encode.js]
|
||||
|
@ -19,21 +19,28 @@ dictionary TextEncoderEncodeIntoResult {
|
||||
interface TextEncoder {
|
||||
constructor();
|
||||
|
||||
/*
|
||||
* This is DOMString in the spec, but the value is always ASCII
|
||||
* and short. By declaring this as ByteString, we get the same
|
||||
* end result (storage as inline Latin1 string in SpiderMonkey)
|
||||
* with fewer conversions.
|
||||
*/
|
||||
[Constant]
|
||||
readonly attribute DOMString encoding;
|
||||
readonly attribute ByteString encoding;
|
||||
|
||||
/*
|
||||
* This is spec-wise USVString but marking it as
|
||||
* DOMString to avoid duplicate work. Since the
|
||||
* UTF-16 to UTF-8 converter performs processing
|
||||
* that's equivalent to first converting a
|
||||
* DOMString to a USVString, let's avoid having
|
||||
* the binding code doing it, too.
|
||||
* JSString as an optimization. (The SpiderMonkey-provided
|
||||
* conversion to UTF-8 takes care of replacing lone
|
||||
* surrogates with the REPLACEMENT CHARACTER, so the
|
||||
* observable behavior of USVString is matched.)
|
||||
*/
|
||||
[NewObject]
|
||||
Uint8Array encode(optional DOMString input = "");
|
||||
Uint8Array encode(optional JSString input = "");
|
||||
|
||||
/*
|
||||
* The same comment about USVString as above applies here.
|
||||
*/
|
||||
TextEncoderEncodeIntoResult encodeInto(DOMString source, Uint8Array destination);
|
||||
[CanOOM]
|
||||
TextEncoderEncodeIntoResult encodeInto(JSString source, Uint8Array destination);
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user