mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-07 21:43:24 +00:00
b00ce35888
--HG-- rename : intl/uconv/ucvtw/nsBIG5DecoderData.h => intl/uconv/ucvtw/nsBIG5Data.cpp rename : testing/web-platform/tests/encoding/gbk-encoder.html => testing/web-platform/tests/encoding/big5-encoder.html
254 lines
5.4 KiB
Python
254 lines
5.4 KiB
Python
#!/usr/bin/python
|
|
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
# Adapted from
|
|
# https://hg.mozilla.org/projects/htmlparser/file/0d906fb1ab90/generate-encoding-data.py
|
|
|
|
# indexes.json comes from
|
|
# https://encoding.spec.whatwg.org/indexes.json
|
|
# i.e.
|
|
# https://github.com/whatwg/encoding/blob/ce4e83d0df5b5efec0697fc76e66699737e033a3/indexes.json
|
|
|
|
import json
|
|
|
|
indexes = json.load(open("indexes.json", "r"))
|
|
|
|
def nullToZero(codePoint):
|
|
if not codePoint:
|
|
codePoint = 0
|
|
return codePoint
|
|
|
|
index = []
|
|
|
|
for codePoint in indexes["big5"]:
|
|
index.append(nullToZero(codePoint))
|
|
|
|
# There are four major gaps consisting of more than 4 consecutive invalid pointers
|
|
gaps = []
|
|
consecutive = 0
|
|
consecutiveStart = 0
|
|
offset = 0
|
|
for codePoint in index:
|
|
if codePoint == 0:
|
|
if consecutive == 0:
|
|
consecutiveStart = offset
|
|
consecutive +=1
|
|
else:
|
|
if consecutive > 4:
|
|
gaps.append((consecutiveStart, consecutiveStart + consecutive))
|
|
consecutive = 0
|
|
offset += 1
|
|
|
|
def invertRanges(ranges, cap):
|
|
inverted = []
|
|
invertStart = 0
|
|
for (start, end) in ranges:
|
|
if start != 0:
|
|
inverted.append((invertStart, start))
|
|
invertStart = end
|
|
inverted.append((invertStart, cap))
|
|
return inverted
|
|
|
|
cap = len(index)
|
|
ranges = invertRanges(gaps, cap)
|
|
|
|
# Now compute a compressed lookup table for astralness
|
|
|
|
gaps = []
|
|
consecutive = 0
|
|
consecutiveStart = 0
|
|
offset = 0
|
|
for codePoint in index:
|
|
if codePoint <= 0xFFFF:
|
|
if consecutive == 0:
|
|
consecutiveStart = offset
|
|
consecutive +=1
|
|
else:
|
|
if consecutive > 40:
|
|
gaps.append((consecutiveStart, consecutiveStart + consecutive))
|
|
consecutive = 0
|
|
offset += 1
|
|
|
|
astralRanges = invertRanges(gaps, cap)
|
|
|
|
|
|
classFile = open("../ucvtw/nsBIG5Data.cpp", "w")
|
|
classFile.write('''/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/*
|
|
* THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
|
|
* Instead, please regenerate using intl/uconv/tools/gen-big5-data.py
|
|
*/
|
|
|
|
#include "nsBIG5Data.h"
|
|
|
|
static const char16_t kBig5LowBitsTable[] = {
|
|
''')
|
|
|
|
for (low, high) in ranges:
|
|
for i in xrange(low, high):
|
|
classFile.write(' 0x%04X,\n' % (index[i] & 0xFFFF))
|
|
|
|
classFile.write('''};
|
|
|
|
static const uint32_t kBig5AstralnessTable[] = {
|
|
''')
|
|
|
|
# An array of bool is inefficient per
|
|
# http://stackoverflow.com/questions/4049156/1-bit-per-bool-in-array-c
|
|
|
|
bits = []
|
|
for (low, high) in astralRanges:
|
|
for i in xrange(low, high):
|
|
bits.append(1 if index[i] > 0xFFFF else 0)
|
|
# pad length to multiple of 32
|
|
for i in xrange(32 - (len(bits) % 32)):
|
|
bits.append(0)
|
|
i = 0
|
|
while i < len(bits):
|
|
accu = 0
|
|
for j in xrange(32):
|
|
accu |= bits[i + j] << j
|
|
classFile.write(' 0x%08X,\n' % accu)
|
|
i += 32
|
|
|
|
classFile.write('''};
|
|
|
|
// static
|
|
char16_t
|
|
nsBIG5Data::LowBits(size_t aPointer)
|
|
{
|
|
''')
|
|
|
|
base = 0
|
|
for (low, high) in ranges:
|
|
classFile.write(''' if (aPointer < %d) {
|
|
return 0;
|
|
}
|
|
if (aPointer < %d) {
|
|
return kBig5LowBitsTable[%d + (aPointer - %d)];
|
|
}
|
|
''' % (low, high, base, low))
|
|
base += (high - low)
|
|
|
|
classFile.write(''' return 0;
|
|
}
|
|
|
|
// static
|
|
bool
|
|
nsBIG5Data::IsAstral(size_t aPointer)
|
|
{
|
|
''')
|
|
|
|
base = 0
|
|
for (low, high) in astralRanges:
|
|
if high - low == 1:
|
|
classFile.write(''' if (aPointer < %d) {
|
|
return false;
|
|
}
|
|
if (aPointer == %d) {
|
|
return true;
|
|
}
|
|
''' % (low, low))
|
|
else:
|
|
classFile.write(''' if (aPointer < %d) {
|
|
return false;
|
|
}
|
|
if (aPointer < %d) {
|
|
size_t index = %d + (aPointer - %d);
|
|
return kBig5AstralnessTable[index >> 5] & (1 << (index & 0x1F));
|
|
}
|
|
''' % (low, high, base, low))
|
|
base += (high - low)
|
|
|
|
classFile.write(''' return false;
|
|
}
|
|
|
|
//static
|
|
size_t
|
|
nsBIG5Data::FindPointer(char16_t aLowBits, bool aIsAstral)
|
|
{
|
|
if (!aIsAstral) {
|
|
switch (aLowBits) {
|
|
''')
|
|
|
|
hkscsBound = (0xA1 - 0x81) * 157
|
|
|
|
preferLast = [
|
|
0x2550,
|
|
0x255E,
|
|
0x2561,
|
|
0x256A,
|
|
0x5341,
|
|
0x5345,
|
|
]
|
|
|
|
for codePoint in preferLast:
|
|
# Python lists don't have .rindex() :-(
|
|
for i in xrange(len(index) - 1, -1, -1):
|
|
candidate = index[i]
|
|
if candidate == codePoint:
|
|
classFile.write(''' case 0x%04X:
|
|
return %d;
|
|
''' % (codePoint, i))
|
|
break
|
|
|
|
classFile.write(''' default:
|
|
break;
|
|
}
|
|
}''')
|
|
|
|
base = 0
|
|
start = 0
|
|
for (low, high) in ranges:
|
|
if low <= hkscsBound and hkscsBound < high:
|
|
# This is the first range we don't ignore and the
|
|
# range that contains the first non-HKSCS pointer.
|
|
# Avoid searching HKSCS.
|
|
start = base + hkscsBound - low
|
|
break
|
|
base += (high - low)
|
|
|
|
classFile.write('''
|
|
for (size_t i = %d; i < MOZ_ARRAY_LENGTH(kBig5LowBitsTable); ++i) {
|
|
if (kBig5LowBitsTable[i] == aLowBits) {
|
|
size_t pointer;
|
|
''' % start)
|
|
|
|
base = 0
|
|
prevLow = 0
|
|
prevHigh = 0
|
|
prevBase = 0
|
|
writing = False
|
|
for (low, high) in ranges:
|
|
if writing:
|
|
classFile.write('''if (i < %d) {
|
|
pointer = i + %d;
|
|
} else ''' % ((prevBase + prevHigh - prevLow), (prevLow - prevBase)))
|
|
prevLow = low
|
|
prevHigh = high
|
|
prevBase = base
|
|
if high > hkscsBound:
|
|
writing = True
|
|
base += (high - low)
|
|
|
|
classFile.write('''{
|
|
pointer = i + %d;
|
|
}''' % (prevLow - prevBase))
|
|
|
|
classFile.write('''
|
|
if (aIsAstral == IsAstral(pointer)) {
|
|
return pointer;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
''')
|
|
classFile.close()
|