mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-08 20:47:44 +00:00
b1b7fa69b4
--HG-- extra : rebase_source : c5e0853fc64557f699d78c67f2907f6b48958563
429 lines
11 KiB
JavaScript
429 lines
11 KiB
JavaScript
const Cc = Components.classes;
|
|
const Ci = Components.interfaces;
|
|
|
|
const NS_ERROR_ILLEGAL_VALUE = Components.results.NS_ERROR_ILLEGAL_VALUE;
|
|
|
|
var BIS, BOS, _Pipe, COS, FIS, _SS, CIS;
|
|
|
|
var dataDir;
|
|
|
|
function run_test()
|
|
{
|
|
BIS = Components.Constructor("@mozilla.org/binaryinputstream;1",
|
|
"nsIBinaryInputStream",
|
|
"setInputStream");
|
|
BOS = Components.Constructor("@mozilla.org/binaryoutputstream;1",
|
|
"nsIBinaryOutputStream",
|
|
"setOutputStream");
|
|
_Pipe = Components.Constructor("@mozilla.org/pipe;1",
|
|
"nsIPipe",
|
|
"init");
|
|
COS = Components.Constructor("@mozilla.org/intl/converter-output-stream;1",
|
|
"nsIConverterOutputStream",
|
|
"init");
|
|
FIS = Components.Constructor("@mozilla.org/network/file-input-stream;1",
|
|
"nsIFileInputStream",
|
|
"init");
|
|
_SS = Components.Constructor("@mozilla.org/storagestream;1",
|
|
"nsIStorageStream",
|
|
"init");
|
|
CIS = Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
|
|
"nsIConverterInputStream",
|
|
"init");
|
|
|
|
dataDir = do_get_file("data/");
|
|
|
|
test_utf8_1();
|
|
test_utf16_1();
|
|
test_utf16_2();
|
|
test_utf16_3();
|
|
test_cross_conversion();
|
|
}
|
|
|
|
const UNICODE_STRINGS =
|
|
[
|
|
'\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE',
|
|
|
|
'AZaz09 \u007F ' + // U+000000 to U+00007F
|
|
'\u0080 \u0398 \u03BB \u0725 ' + // U+000080 to U+0007FF
|
|
'\u0964 \u0F5F \u20AC \uFFFB' // U+000800 to U+00FFFF
|
|
|
|
// there would be strings containing non-BMP code points here, but
|
|
// unfortunately JS strings are UCS-2 (and worse yet are treated as
|
|
// 16-bit values by the spec), so we have to do gymnastics to work
|
|
// with non-BMP -- manual surrogate decoding doesn't work because
|
|
// String.prototype.charCodeAt() ignores surrogate pairs and only
|
|
// returns 16-bit values
|
|
];
|
|
|
|
// test conversion equality -- keys are names of files containing equivalent
|
|
// Unicode data, values are the encoding of the file in the format expected by
|
|
// nsIConverter(In|Out)putStream.init
|
|
const UNICODE_FILES =
|
|
{
|
|
"unicode-conversion.utf8.txt": "UTF-8",
|
|
"unicode-conversion.utf16.txt": "UTF-16",
|
|
"unicode-conversion.utf16le.txt": "UTF-16LE",
|
|
"unicode-conversion.utf16be.txt": "UTF-16BE"
|
|
};
|
|
|
|
function test_utf8_1()
|
|
{
|
|
for (var i = 0; i < UNICODE_STRINGS.length; i++)
|
|
{
|
|
var pipe = Pipe();
|
|
var conv = new COS(pipe.outputStream, "UTF-8", 1024, 0x0);
|
|
do_check_true(conv.writeString(UNICODE_STRINGS[i]));
|
|
conv.close();
|
|
|
|
if (!equal(new UTF8(pipe.inputStream),
|
|
stringToCodePoints(UNICODE_STRINGS[i])))
|
|
do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
|
|
}
|
|
}
|
|
|
|
function test_utf16_1()
|
|
{
|
|
for (var i = 0; i < UNICODE_STRINGS.length; i++)
|
|
{
|
|
var pipe = Pipe();
|
|
var conv = new COS(pipe.outputStream, "UTF-16", 1024, 0x0);
|
|
do_check_true(conv.writeString(UNICODE_STRINGS[i]));
|
|
conv.close();
|
|
|
|
if (!equal(new UTF16(pipe.inputStream),
|
|
stringToCodePoints(UNICODE_STRINGS[i])))
|
|
do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
|
|
}
|
|
}
|
|
|
|
function test_utf16_2()
|
|
{
|
|
for (var i = 0; i < UNICODE_STRINGS.length; i++)
|
|
{
|
|
var pipe = Pipe();
|
|
var conv = new COS(pipe.outputStream, "UTF-16LE", 1024, 0x0);
|
|
do_check_true(conv.writeString(UNICODE_STRINGS[i]));
|
|
conv.close();
|
|
|
|
if (!equal(new UTF16(pipe.inputStream, false),
|
|
stringToCodePoints(UNICODE_STRINGS[i])))
|
|
do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
|
|
}
|
|
}
|
|
|
|
function test_utf16_3()
|
|
{
|
|
for (var i = 0; i < UNICODE_STRINGS.length; i++)
|
|
{
|
|
var pipe = Pipe();
|
|
var conv = new COS(pipe.outputStream, "UTF-16BE", 1024, 0x0);
|
|
do_check_true(conv.writeString(UNICODE_STRINGS[i]));
|
|
conv.close();
|
|
|
|
if (!equal(new UTF16(pipe.inputStream, true),
|
|
stringToCodePoints(UNICODE_STRINGS[i])))
|
|
do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
|
|
}
|
|
}
|
|
|
|
|
|
function test_cross_conversion()
|
|
{
|
|
for (var fn1 in UNICODE_FILES)
|
|
{
|
|
var fin = getBinaryInputStream(fn1);
|
|
var ss = StorageStream();
|
|
|
|
var bos = new BOS(ss.getOutputStream(0));
|
|
var av;
|
|
while ((av = fin.available()) > 0)
|
|
{
|
|
var data = fin.readByteArray(av);
|
|
bos.writeByteArray(data, data.length);
|
|
}
|
|
fin.close();
|
|
bos.close();
|
|
|
|
for (var fn2 in UNICODE_FILES)
|
|
{
|
|
var fin2 = getUnicharInputStream(fn2, UNICODE_FILES[fn2]);
|
|
var unichar = new CIS(ss.newInputStream(0),
|
|
UNICODE_FILES[fn1], 8192, 0x0);
|
|
|
|
if (!equalUnicharStreams(unichar, fin2))
|
|
do_throw("unequal streams: " +
|
|
UNICODE_FILES[fn1] + ", " +
|
|
UNICODE_FILES[fn2]);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// utility functions
|
|
|
|
function StorageStream()
|
|
{
|
|
return new _SS(8192, Math.pow(2, 32) - 1, null);
|
|
}
|
|
|
|
function getUnicharInputStream(filename, encoding)
|
|
{
|
|
var file = dataDir.clone();
|
|
file.append(filename);
|
|
|
|
const PR_RDONLY = 0x1;
|
|
var fis = new FIS(file, PR_RDONLY, 0644, Ci.nsIFileInputStream.CLOSE_ON_EOF);
|
|
return new CIS(fis, encoding, 8192, 0x0);
|
|
}
|
|
|
|
function getBinaryInputStream(filename, encoding)
|
|
{
|
|
var file = dataDir.clone();
|
|
file.append(filename);
|
|
|
|
const PR_RDONLY = 0x1;
|
|
var fis = new FIS(file, PR_RDONLY, 0644, Ci.nsIFileInputStream.CLOSE_ON_EOF);
|
|
return new BIS(fis);
|
|
}
|
|
|
|
function equal(stream, codePoints)
|
|
{
|
|
var sz, currIndex = 0;
|
|
while (true)
|
|
{
|
|
var unit = stream.readUnit();
|
|
if (unit < 0)
|
|
return currIndex == codePoints.length;
|
|
if (unit !== codePoints[currIndex++])
|
|
return false;
|
|
}
|
|
|
|
do_throw("not reached");
|
|
return false;
|
|
}
|
|
|
|
function equalUnicharStreams(s1, s2)
|
|
{
|
|
var r1, r2;
|
|
var str1 = {}, str2 = {};
|
|
while (true)
|
|
{
|
|
r1 = s1.readString(1024, str1);
|
|
r2 = s2.readString(1024, str2);
|
|
|
|
if (r1 != r2 || str1.value != str2.value)
|
|
{
|
|
print("r1: " + r1 + ", r2: " + r2);
|
|
print(str1.value.length);
|
|
print(str2.value.length);
|
|
return false;
|
|
}
|
|
if (r1 == 0 && r2 == 0)
|
|
return true;
|
|
}
|
|
|
|
// not reached
|
|
return false;
|
|
}
|
|
|
|
function stringToCodePoints(str)
|
|
{
|
|
return str.split('').map(function(v){ return v.charCodeAt(0); });
|
|
}
|
|
|
|
function lowbits(n)
|
|
{
|
|
return Math.pow(2, n) - 1;
|
|
}
|
|
|
|
function Pipe()
|
|
{
|
|
return new _Pipe(false, false, 1024, 10, null);
|
|
}
|
|
|
|
|
|
// complex charset readers
|
|
|
|
/**
|
|
* Wraps a UTF-8 stream to allow access to the Unicode code points in it.
|
|
*
|
|
* @param stream
|
|
* the stream to wrap
|
|
*/
|
|
function UTF8(stream)
|
|
{
|
|
this._stream = new BIS(stream);
|
|
}
|
|
UTF8.prototype =
|
|
{
|
|
// returns numeric code point at front of stream encoded in UTF-8, -1 if at
|
|
// end of stream, or throws if valid (and properly encoded!) code point not
|
|
// found
|
|
readUnit: function()
|
|
{
|
|
var str = this._stream;
|
|
|
|
var c, c2, c3, c4, rv;
|
|
|
|
// if at end of stream, must distinguish failure to read any bytes
|
|
// (correct behavior) from failure to read some byte after the first
|
|
// in the character
|
|
try
|
|
{
|
|
c = str.read8();
|
|
}
|
|
catch (e)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
if (c < 0x80)
|
|
return c;
|
|
|
|
if (c < 0xC0) // c < 11000000
|
|
{
|
|
// byte doesn't have enough leading ones (must be at least two)
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
|
|
c2 = str.read8();
|
|
if (c2 >= 0xC0 || c2 < 0x80)
|
|
throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx
|
|
|
|
if (c < 0xE0) // c < 11100000
|
|
{
|
|
// two-byte between U+000080 and U+0007FF
|
|
rv = ((lowbits(5) & c) << 6) +
|
|
(lowbits(6) & c2);
|
|
// no upper bounds-check needed, by previous lines
|
|
if (rv >= 0x80)
|
|
return rv;
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
|
|
c3 = str.read8();
|
|
if (c3 >= 0xC0 || c3 < 0x80)
|
|
throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx
|
|
|
|
if (c < 0xF0) // c < 11110000
|
|
{
|
|
// three-byte between U+000800 and U+00FFFF
|
|
rv = ((lowbits(4) & c) << 12) +
|
|
((lowbits(6) & c2) << 6) +
|
|
(lowbits(6) & c3);
|
|
// no upper bounds-check needed, by previous lines
|
|
if (rv >= 0xE000 ||
|
|
(rv >= 0x800 && rv <= 0xD7FF))
|
|
return rv;
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
|
|
c4 = str.read8();
|
|
if (c4 >= 0xC0 || c4 < 0x80)
|
|
throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx
|
|
|
|
if (c < 0xF8) // c < 11111000
|
|
{
|
|
// four-byte between U+010000 and U+10FFFF
|
|
rv = ((lowbits(3) & c) << 18) +
|
|
((lowbits(6) & c2) << 12) +
|
|
((lowbits(6) & c3) << 6) +
|
|
(lowbits(6) & c4);
|
|
// need an upper bounds-check since 0x10FFFF isn't (2**n - 1)
|
|
if (rv >= 0x10000 && rv <= 0x10FFFF)
|
|
return rv;
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
// 11111000 or greater -- no UTF-8 mapping
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Wraps a UTF-16 stream to allow access to the Unicode code points in it.
|
|
*
|
|
* @param stream
|
|
* the stream to wrap
|
|
* @param bigEndian
|
|
* true for UTF-16BE, false for UTF-16LE, not present at all for UTF-16 with
|
|
* a byte-order mark
|
|
*/
|
|
function UTF16(stream, bigEndian)
|
|
{
|
|
this._stream = new BIS(stream);
|
|
if (arguments.length > 1)
|
|
{
|
|
this._bigEndian = bigEndian;
|
|
}
|
|
else
|
|
{
|
|
var bom = this._stream.read16();
|
|
if (bom == 0xFEFF)
|
|
this._bigEndian = true;
|
|
else if (bom == 0xFFFE)
|
|
this._bigEndian = false;
|
|
else
|
|
do_throw("missing BOM: " + bom.toString(16).toUpperCase());
|
|
}
|
|
}
|
|
UTF16.prototype =
|
|
{
|
|
// returns numeric code point at front of stream encoded in UTF-16,
|
|
// -1 if at end of stream, or throws if UTF-16 code point not found
|
|
readUnit: function()
|
|
{
|
|
var str = this._stream;
|
|
|
|
// if at end of stream, must distinguish failure to read any bytes
|
|
// (correct behavior) from failure to read some byte after the first
|
|
// in the character
|
|
try
|
|
{
|
|
var b1 = str.read8();
|
|
}
|
|
catch (e)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
var b2 = str.read8();
|
|
|
|
var w1 = this._bigEndian
|
|
? (b1 << 8) + b2
|
|
: (b2 << 8) + b1;
|
|
|
|
if (w1 > 0xDBFF && w1 < 0xE000)
|
|
{
|
|
// second surrogate, but expecting none or first
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
if (w1 > 0xD7FF && w1 < 0xDC00)
|
|
{
|
|
// non-BMP, use surrogate pair
|
|
b1 = str.read8();
|
|
b2 = str.read8();
|
|
var w2 = this._bigEndian
|
|
? (b1 << 8) + b2
|
|
: (b2 << 8) + b1;
|
|
if (w2 < 0xDC00 || w2 > 0xDFFF)
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
|
|
var rv = 0x100000 +
|
|
((lowbits(10) & w2) << 10) +
|
|
(lowbits(10) & w1);
|
|
if (rv <= 0x10FFFF)
|
|
return rv;
|
|
throw NS_ERROR_ILLEGAL_VALUE;
|
|
}
|
|
|
|
// non-surrogate
|
|
return w1;
|
|
}
|
|
};
|