Bug 933885 part 4b: split test_singlebytes.js into misc tests and roundtrip tests. r=emk

--HG--
rename : dom/encoding/test/unit/test_singlebytes.js => dom/encoding/test/unit/test_misc.js
rename : dom/encoding/test/unit/test_singlebytes.js => dom/encoding/test/unit/test_utf.js
This commit is contained in:
Zack Weinberg 2014-01-25 23:34:31 -05:00
parent d544230108
commit f9e62e8b02
3 changed files with 152 additions and 148 deletions

View File

@ -1,153 +1,6 @@
// NOTE: Requires testharness.js
// http://www.w3.org/2008/webapps/wiki/Harness
function testEncodeDecode(encoding, min, max) {
function cpname(n) {
return 'U+' + ((n <= 0xFFFF) ?
('0000' + n.toString(16).toUpperCase()).slice(-4) :
n.toString(16).toUpperCase());
}
test(
function() {
var string, i, j, BATCH_SIZE = 0x1000;
for (i = min; i < max; i += BATCH_SIZE) {
string = '';
for (j = i; j < i + BATCH_SIZE && j < max; j += 1) {
if (0xd800 <= j && j <= 0xdfff) {
// surrogate half
continue;
} else if (j > 0xffff) {
// outside BMP - encode as surrogate pair
string += String.fromCharCode(
0xd800 + ((j >> 10) & 0x3ff),
0xdc00 + (j & 0x3ff));
} else {
string += String.fromCharCode(i);
}
}
var encoded = new TextEncoder(encoding).encode(string);
var decoded = new TextDecoder(encoding).decode(encoded);
assert_equals(string, decoded, 'Round trip ' + cpname(i) + " - " + cpname(j));
}
},
encoding + " - Encode/Decode Range " + cpname(min) + " - " + cpname(max)
);
}
testEncodeDecode('UTF-8', 0, 0x10FFFF);
testEncodeDecode('UTF-16LE', 0, 0x10FFFF);
testEncodeDecode('UTF-16BE', 0, 0x10FFFF);
// Inspired by:
// http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html
function encode_utf8(string) {
var utf8 = unescape(encodeURIComponent(string));
var octets = [], i;
for (i = 0; i < utf8.length; i += 1) {
octets.push(utf8.charCodeAt(i));
}
return octets;
}
function decode_utf8(octets) {
var utf8 = String.fromCharCode.apply(null, octets);
return decodeURIComponent(escape(utf8));
}
test(
function() {
var actual, expected, str, i, j, BATCH_SIZE = 0x1000;
for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
str = '';
for (j = i; j < i + BATCH_SIZE; j += 1) {
if (0xd800 <= j && j <= 0xdfff) {
// surrogate half
continue;
} else if (j > 0xffff) {
// outside BMP - encode as surrogate pair
str += String.fromCharCode(
0xd800 + ((j >> 10) & 0x3ff),
0xdc00 + (j & 0x3ff));
} else {
str += String.fromCharCode(i);
}
}
expected = encode_utf8(str);
actual = new TextEncoder('UTF-8').encode(str);
assert_array_equals(actual, expected, 'expected equal encodings');
}
},
"UTF-8 encoding (compare against unescape/encodeURIComponent)"
);
test(
function() {
var encoded, actual, expected, str, i, j, BATCH_SIZE = 0x1000;
for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
str = '';
for (j = i; j < i + BATCH_SIZE; j += 1) {
if (0xd800 <= j && j <= 0xdfff) {
// surrogate half
continue;
} else if (j > 0xffff) {
// outside BMP - encode as surrogate pair
str += String.fromCharCode(
0xd800 + ((j >> 10) & 0x3ff),
0xdc00 + (j & 0x3ff));
} else {
str += String.fromCharCode(i);
}
}
encoded = encode_utf8(str);
expected = decode_utf8(encoded);
actual = new TextDecoder('UTF-8').decode(new Uint8Array(encoded));
assert_equals(actual, expected, 'expected equal decodings');
}
},
"UTF-8 decoding (compare against decodeURIComponent/escape)"
);
function testEncodeDecodeSample(encoding, string, expected) {
test(
function() {
var encoded = new TextEncoder(encoding).encode(string);
assert_array_equals(encoded, expected, 'expected equal encodings ' + encoding);
var decoded = new TextDecoder(encoding).decode(new Uint8Array(expected));
assert_equals(decoded, string, 'expected equal decodings ' + encoding);
},
encoding + " - Encode/Decode - reference sample"
);
}
testEncodeDecodeSample(
"utf-8",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD]
);
testEncodeDecodeSample(
"utf-16le",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
);
testEncodeDecodeSample(
"utf-16be",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD]
);
testEncodeDecodeSample(
"utf-16",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
);
test(
function() {
var badStrings = [

View File

@ -0,0 +1,149 @@
// NOTE: Requires testharness.js
// http://www.w3.org/2008/webapps/wiki/Harness
function testEncodeDecode(encoding, min, max) {
function cpname(n) {
return 'U+' + ((n <= 0xFFFF) ?
('0000' + n.toString(16).toUpperCase()).slice(-4) :
n.toString(16).toUpperCase());
}
test(
function() {
var string, i, j, BATCH_SIZE = 0x1000;
for (i = min; i < max; i += BATCH_SIZE) {
string = '';
for (j = i; j < i + BATCH_SIZE && j < max; j += 1) {
if (0xd800 <= j && j <= 0xdfff) {
// surrogate half
continue;
} else if (j > 0xffff) {
// outside BMP - encode as surrogate pair
string += String.fromCharCode(
0xd800 + ((j >> 10) & 0x3ff),
0xdc00 + (j & 0x3ff));
} else {
string += String.fromCharCode(i);
}
}
var encoded = new TextEncoder(encoding).encode(string);
var decoded = new TextDecoder(encoding).decode(encoded);
assert_equals(string, decoded, 'Round trip ' + cpname(i) + " - " + cpname(j));
}
},
encoding + " - Encode/Decode Range " + cpname(min) + " - " + cpname(max)
);
}
testEncodeDecode('UTF-8', 0, 0x10FFFF);
testEncodeDecode('UTF-16LE', 0, 0x10FFFF);
testEncodeDecode('UTF-16BE', 0, 0x10FFFF);
// Inspired by:
// http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html
function encode_utf8(string) {
var utf8 = unescape(encodeURIComponent(string));
var octets = [], i;
for (i = 0; i < utf8.length; i += 1) {
octets.push(utf8.charCodeAt(i));
}
return octets;
}
function decode_utf8(octets) {
var utf8 = String.fromCharCode.apply(null, octets);
return decodeURIComponent(escape(utf8));
}
test(
function() {
var actual, expected, str, i, j, BATCH_SIZE = 0x1000;
for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
str = '';
for (j = i; j < i + BATCH_SIZE; j += 1) {
if (0xd800 <= j && j <= 0xdfff) {
// surrogate half
continue;
} else if (j > 0xffff) {
// outside BMP - encode as surrogate pair
str += String.fromCharCode(
0xd800 + ((j >> 10) & 0x3ff),
0xdc00 + (j & 0x3ff));
} else {
str += String.fromCharCode(i);
}
}
expected = encode_utf8(str);
actual = new TextEncoder('UTF-8').encode(str);
assert_array_equals(actual, expected, 'expected equal encodings');
}
},
"UTF-8 encoding (compare against unescape/encodeURIComponent)"
);
test(
function() {
var encoded, actual, expected, str, i, j, BATCH_SIZE = 0x1000;
for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) {
str = '';
for (j = i; j < i + BATCH_SIZE; j += 1) {
if (0xd800 <= j && j <= 0xdfff) {
// surrogate half
continue;
} else if (j > 0xffff) {
// outside BMP - encode as surrogate pair
str += String.fromCharCode(
0xd800 + ((j >> 10) & 0x3ff),
0xdc00 + (j & 0x3ff));
} else {
str += String.fromCharCode(i);
}
}
encoded = encode_utf8(str);
expected = decode_utf8(encoded);
actual = new TextDecoder('UTF-8').decode(new Uint8Array(encoded));
assert_equals(actual, expected, 'expected equal decodings');
}
},
"UTF-8 decoding (compare against decodeURIComponent/escape)"
);
function testEncodeDecodeSample(encoding, string, expected) {
test(
function() {
var encoded = new TextEncoder(encoding).encode(string);
assert_array_equals(encoded, expected, 'expected equal encodings ' + encoding);
var decoded = new TextDecoder(encoding).decode(new Uint8Array(expected));
assert_equals(decoded, string, 'expected equal decodings ' + encoding);
},
encoding + " - Encode/Decode - reference sample"
);
}
testEncodeDecodeSample(
"utf-8",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD]
);
testEncodeDecodeSample(
"utf-16le",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
);
testEncodeDecodeSample(
"utf-16be",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD]
);
testEncodeDecodeSample(
"utf-16",
"z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD", // z, cent, CJK water, G-Clef, Private-use character
[0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]
);

View File

@ -2,6 +2,9 @@
head = head.js
tail =
[test_misc.js]
[test_utf.js]
[test_big5.js]
[test_euc-jp.js]
[test_euc-kr.js]
@ -9,4 +12,3 @@ tail =
[test_hz-gb-2312.js]
[test_iso-2022-jp.js]
[test_shift_jis.js]
[test_singlebytes.js]