Bug 1257877 - Remove UTF-16 support from TextEncoder. r=hsivonen

This commit is contained in:
Masatoshi Kimura 2016-03-24 19:27:15 +09:00
parent 4029855eed
commit 160a5b6fb7
7 changed files with 78 additions and 165 deletions

View File

@ -32,7 +32,7 @@
const { Buffer, TextEncoder, TextDecoder } = require('sdk/io/buffer');
const { safeMerge } = require('sdk/util/object');
const ENCODINGS = ['utf-8', 'utf-16le', 'utf-16be'];
const ENCODINGS = ['utf-8'];
exports.testBufferMain = function (assert) {
let b = Buffer('abcdef');
@ -246,9 +246,6 @@ exports.testBufferWrite = function (assert) {
let b = Buffer(1024);
b.fill(0);
assert.throws(() => {
b.write('test string', 0, 5, 'invalid');
}, RangeError, 'invalid encoding with buffer write throws');
// try to write a 0-length string beyond the end of b
assert.throws(function() {
b.write('', 2048);

View File

@ -13,28 +13,10 @@ namespace mozilla {
namespace dom {
void
TextEncoder::Init(const nsAString& aEncoding, ErrorResult& aRv)
TextEncoder::Init()
{
nsAutoString label(aEncoding);
EncodingUtils::TrimSpaceCharacters(label);
// Let encoding be the result of getting an encoding from label.
// If encoding is failure, or is none of utf-8, utf-16, and utf-16be,
// throw a RangeError (https://encoding.spec.whatwg.org/#dom-textencoder).
if (!EncodingUtils::FindEncodingForLabel(label, mEncoding)) {
aRv.ThrowRangeError<MSG_ENCODING_NOT_SUPPORTED>(label);
return;
}
if (!mEncoding.EqualsLiteral("UTF-8") &&
!mEncoding.EqualsLiteral("UTF-16LE") &&
!mEncoding.EqualsLiteral("UTF-16BE")) {
aRv.ThrowRangeError<MSG_DOM_ENCODING_NOT_UTF>();
return;
}
// Create an encoder object for mEncoding.
mEncoder = EncodingUtils::EncoderForEncoding(mEncoding);
// Create an encoder object for utf-8.
mEncoder = EncodingUtils::EncoderForEncoding(NS_LITERAL_CSTRING("UTF-8"));
}
void
@ -92,8 +74,7 @@ TextEncoder::Encode(JSContext* aCx,
void
TextEncoder::GetEncoding(nsAString& aEncoding)
{
CopyASCIItoUTF16(mEncoding, aEncoding);
nsContentUtils::ASCIIToLower(aEncoding);
aEncoding.AssignLiteral("utf-8");
}
} // namespace dom

View File

@ -24,14 +24,10 @@ public:
static TextEncoder*
Constructor(const GlobalObject& aGlobal,
const nsAString& aEncoding,
ErrorResult& aRv)
{
nsAutoPtr<TextEncoder> txtEncoder(new TextEncoder());
txtEncoder->Init(aEncoding, aRv);
if (aRv.Failed()) {
return nullptr;
}
txtEncoder->Init();
return txtEncoder.forget();
}
@ -50,16 +46,7 @@ public:
protected:
/**
* Validates provided encoding and throws an exception if invalid encoding.
* If no encoding is provided then mEncoding is default initialised to "utf-8".
*
* @param aEncoding Optional encoding (case insensitive) provided.
* (valid values are "utf-8", "utf-16", "utf-16be")
* Default value is "utf-8" if no encoding is provided.
* @return aRv EncodingError exception else null.
*/
void Init(const nsAString& aEncoding, ErrorResult& aRv);
void Init();
public:
/**
@ -70,7 +57,7 @@ public:
void GetEncoding(nsAString& aEncoding);
/**
* Encodes incoming utf-16 code units/ DOM string to the requested encoding.
* Encodes incoming utf-16 code units/ DOM string to utf-8.
*
* @param aCx Javascript context.
* @param aObj the wrapper of the TextEncoder
@ -84,7 +71,6 @@ public:
JS::MutableHandle<JSObject*> aRetval,
ErrorResult& aRv);
private:
nsCString mEncoding;
nsCOMPtr<nsIUnicodeEncoder> mEncoder;
};

View File

@ -4,6 +4,15 @@
*/
function runTextEncoderTests()
{
test(testEncoderEncode, "testEncoderEncode");
test(testEncoderGetEncoding, "testEncoderGetEncoding");
test(testInvalidSequence, "testInvalidSequence");
test(testInputString, "testInputString");
test(testStreamingOptions, "testStreamingOptions");
}
function testEncoderEncode()
{
var data = "\u00a0\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09"
+ "\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14"
@ -46,19 +55,9 @@ function runTextEncoderTests()
0xB9, 0x98, 0xE0, 0xB9, 0x99, 0xE0, 0xB9, 0x9A, 0xE0,
0xB9, 0x9B];
test(testEncoderGetEncoding, "testEncoderGetEncoding");
test(testInvalidSequence, "testInvalidSequence");
test(testEncodeUTF16ToUTF16, "testEncodeUTF16ToUTF16");
test(function() {
testConstructorEncodingOption(data, expectedString)
}, "testConstructorEncodingOption");
test(function() {
testEncodingValues(data, expectedString)
}, "testEncodingValues");
test(function() {
testInputString(data, expectedString)
}, "testInputString");
test(testStreamingOptions, "testStreamingOptions");
// valid encoding passed
testSingleString({input: data, expected: expectedString,
msg: "testing encoding with valid utf-8 encoding."});
}
function testInvalidSequence()
@ -68,77 +67,18 @@ function testInvalidSequence()
0xE0, 0xB9, 0x85];
//Test null input string
testSingleString({encoding: "utf-8", input: data, expected: expectedString,
testSingleString({input: data, expected: expectedString,
msg: "encoder with replacement character test."});
}
function testEncodeUTF16ToUTF16()
{
var data = "\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c"
+ "\u0e4d\u0e4e\u0e4f\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56"
+ "\u0e57\u0e58\u0e59\u0e5a\u0e5b";
var expected = [0x43, 0x0E, 0x44, 0x0E, 0x45, 0x0E, 0x46, 0x0E, 0x47, 0x0E,
0x48, 0x0E, 0x49, 0x0E, 0x4A, 0x0E, 0x4B, 0x0E, 0x4C, 0x0E,
0x4D, 0x0E, 0x4E, 0x0E, 0x4F, 0x0E, 0x50, 0x0E, 0x51, 0x0E,
0x52, 0x0E, 0x53, 0x0E, 0x54, 0x0E, 0x55, 0x0E, 0x56, 0x0E,
0x57, 0x0E, 0x58, 0x0E, 0x59, 0x0E, 0x5A, 0x0E, 0x5B, 0x0E];
testSingleString({encoding: "Utf-16", input: data, expected: expected,
msg: "testing encoding from utf-16 to utf-16 zero."});
}
function testConstructorEncodingOption(aData, aExpectedString)
{
function errorMessage(encoding) {
return `The given encoding '${String(encoding).trim()}' is not supported.`;
}
// valid encoding passed
var encoding = "UTF-8";
testSingleString({encoding: encoding, input: aData, expected: aExpectedString,
msg: "testing encoding with valid utf-8 encoding."});
// passing spaces for encoding
encoding = " ";
testSingleString({encoding: encoding, input: aData, error: "RangeError",
errorMessage: errorMessage(encoding),
msg: "constructor encoding, spaces encoding test."});
// invalid encoding passed
encoding = "asdfasdf";
testSingleString({encoding: encoding, input: aData, error: "RangeError",
errorMessage: errorMessage(encoding),
msg: "constructor encoding, invalid encoding test."});
// null encoding passed
encoding = null;
testSingleString({encoding: encoding, input: aData, error: "RangeError",
errorMessage: errorMessage(encoding),
msg: "constructor encoding, \"null\" encoding test."});
// empty encoding passed
encoding = "";
testSingleString({encoding: encoding, input: aData, error: "RangeError",
errorMessage: errorMessage(encoding),
msg: "constructor encoding, empty encoding test."});
}
function testEncodingValues(aData, aExpectedString)
{
var encoding = "ISO-8859-11";
testSingleString({encoding: aData, input: encoding, error: "RangeError",
msg: "encoder encoding values test."});
}
function testInputString(aData, aExpectedString)
function testInputString()
{
//Test null input string
testSingleString({encoding: "utf-8", input: "", expected: [],
testSingleString({input: "", expected: [],
msg: "encoder null input string test."});
//Test spaces as input string
testSingleString({encoding: "utf-8", input: " ", expected: [32, 32],
testSingleString({input: " ", expected: [32, 32],
msg: "spaces as input string."});
}
@ -147,7 +87,7 @@ function testSingleString(test)
var outText;
try {
var stream = test.stream ? {stream: true} : null;
outText = (new TextEncoder(test.encoding)).encode(test.input, stream);
outText = (new TextEncoder()).encode(test.input, stream);
} catch (e) {
assert_equals(e.name, test.error, test.msg + " error thrown from the constructor.");
if (test.errorMessage) {
@ -223,13 +163,9 @@ function testStreamingOptions()
0xE0, 0xB9, 0x98, 0xE0, 0xB9, 0x99, 0xE0, 0xB9, 0x9A,
0xE0, 0xB9, 0x9B]];
var expectedUTF16 = data.map(function(d) {
return new Uint8Array(new Uint16Array(arrayFromString(d)).buffer);
});
// STREAMING TEST ONE: test streaming three valid strings with stream option
// set to true for all three.
testArrayOfStrings({encoding: "utf-8", array: [
testArrayOfStrings({array: [
{input: data[0], stream: true, expected: expected[0]},
{input: data[1], stream: true, expected: expected[1]},
{input: data[2], stream: true, expected: expected[2]},
@ -238,10 +174,10 @@ function testStreamingOptions()
// STREAMING TEST TWO: test streaming valid strings with stream option
// streaming option: false from constructor, string 1 stream: true,
// string 2 stream: false, string 3 stream: false
testArrayOfStrings({encoding: "utf-16", array: [
{input: data[0], stream: true, expected: expectedUTF16[0]},
{input: data[1], expected: expectedUTF16[1]},
{input: data[2], expected: expectedUTF16[2]},
testArrayOfStrings({array: [
{input: data[0], stream: true, expected: expected[0]},
{input: data[1], expected: expected[1]},
{input: data[2], expected: expected[2]},
], msg: "streaming test two."});
}
@ -253,7 +189,7 @@ function testArrayOfStrings(test)
{
var encoder;
try {
encoder = new TextEncoder(test.encoding);
encoder = new TextEncoder();
} catch (e) {
assert_equals(e.name, test.error, test.msg);
return;
@ -278,16 +214,6 @@ function testArrayOfStrings(test)
function testEncoderGetEncoding()
{
var labelEncodings = [
{encoding: "utf-8", labels: ["unicode-1-1-utf-8", "utf-8", "utf8"]},
{encoding: "utf-16le", labels: ["utf-16", "utf-16"]},
{encoding: "utf-16be", labels: ["utf-16be"]},
];
for (var le of labelEncodings) {
for (var label of le.labels) {
var encoder = new TextEncoder(label);
assert_equals(encoder.encoding, le.encoding, label + " label encoding test.");
}
}
var encoder = new TextEncoder();
assert_equals(encoder.encoding, "utf-8", "TextEncoder encoding test.");
}

View File

@ -133,7 +133,17 @@ test(
function () {
["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) {
var string = "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF";
var encoded = new TextEncoder(encoding).encode(string);
var octets = {
"utf-16le": [0x00,0x00,0x31,0x00,0x32,0x00,0x33,0x00,0x41,0x00,0x42,0x00,
0x43,0x00,0x61,0x00,0x62,0x00,0x63,0x00,0x80,0x00,0xFF,0x00,
0x00,0x01,0x00,0x10,0xFD,0xFF,0x00,0xD8,0x00,0xDC,0xFF,0xDB,
0xFF,0xDF],
"utf-16be": [0x00,0x00,0x00,0x31,0x00,0x32,0x00,0x33,0x00,0x41,0x00,0x42,
0x00,0x43,0x00,0x61,0x00,0x62,0x00,0x63,0x00,0x80,0x00,0xFF,
0x01,0x00,0x10,0x00,0xFF,0xFD,0xD8,0x00,0xDC,0x00,0xDB,0xFF,
0xDF,0xFF]
};
var encoded = octets[encoding] || new TextEncoder(encoding).encode(string);
for (var len = 1; len <= 5; ++len) {
var out = "", decoder = new TextDecoder(encoding);
@ -204,19 +214,12 @@ test(
test(
function () {
var utf_encodings = ["utf-8", "utf-16le", "utf-16be"];
var encodings = ["utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined", "utf-16le", "utf-16be"];
var legacy_encodings = ["ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];
utf_encodings.forEach(function(encoding) {
encodings.forEach(function(encoding) {
assert_equals(new TextDecoder(encoding).encoding, encoding);
assert_equals(new TextEncoder(encoding).encoding, encoding);
});
legacy_encodings.forEach(function(encoding) {
assert_equals(new TextDecoder(encoding).encoding, encoding);
assert_throws({name: 'RangeError'}, function() { new TextEncoder(encoding); });
assert_equals(new TextEncoder(encoding).encoding, "utf-8");
});
},
"Non-UTF encodings supported only for decode, not encode"
"Non-UTF-8 encodings supported only for decode, not encode"
);

View File

@ -42,6 +42,28 @@ function encode_utf8(string) {
return octets;
}
function encode_utf16le(string) {
var octets = new Uint8Array(string.length * 2);
var di = 0;
for (var i = 0; i < string.length; i++) {
var code = string.charCodeAt(i);
octets[di++] = code & 0xFF;
octets[di++] = code >> 8;
}
return octets;
}
function encode_utf16be(string) {
var octets = new Uint8Array(string.length * 2);
var di = 0;
for (var i = 0; i < string.length; i++) {
var code = string.charCodeAt(i);
octets[di++] = code >> 8;
octets[di++] = code & 0xFF;
}
return octets;
}
function decode_utf8(octets) {
var utf8 = String.fromCharCode.apply(null, octets);
return decodeURIComponent(escape(utf8));
@ -94,13 +116,11 @@ function test_utf_roundtrip () {
var block, block_tag, i, j, encoded, decoded, exp_encoded, exp_decoded;
var TE_U16LE = new TextEncoder("UTF-16LE");
var TD_U16LE = new TextDecoder("UTF-16LE");
var TE_U16BE = new TextEncoder("UTF-16BE");
var TD_U16BE = new TextDecoder("UTF-16BE");
var TE_U8 = new TextEncoder("UTF-8");
var TE_U8 = new TextEncoder();
var TD_U8 = new TextDecoder("UTF-8");
for (i = MIN_CODEPOINT; i < MAX_CODEPOINT; i += BLOCK_SIZE) {
@ -108,11 +128,11 @@ function test_utf_roundtrip () {
block = genblock(i, BLOCK_SIZE);
// test UTF-16LE, UTF-16BE, and UTF-8 encodings against themselves
encoded = TE_U16LE.encode(block);
encoded = encode_utf16le(block);
decoded = TD_U16LE.decode(encoded);
assert_string_equals(block, decoded, "UTF-16LE round trip " + block_tag);
encoded = TE_U16BE.encode(block);
encoded = encode_utf16be(block);
decoded = TD_U16BE.decode(encoded);
assert_string_equals(block, decoded, "UTF-16BE round trip " + block_tag);
@ -145,12 +165,12 @@ function test_utf_samples () {
expected: [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD] }
];
var encoded = new TextEncoder().encode(sample);
assert_array_equals(encoded, cases[0].expected,
"expected equal encodings");
cases.forEach(
function(t) {
var encoded = new TextEncoder(t.encoding).encode(sample);
assert_array_equals(encoded, t.expected,
"expected equal encodings - " + t.encoding);
var decoded = new TextDecoder(t.encoding)
.decode(new Uint8Array(t.expected));
assert_equals(decoded, sample,

View File

@ -10,7 +10,7 @@
* http://creativecommons.org/publicdomain/zero/1.0/
*/
[Constructor(optional DOMString utfLabel = "utf-8"),
[Constructor,
Exposed=(Window,Worker,System)]
interface TextEncoder {
[Constant]