mirror of
https://github.com/darlinghq/darling-JavaScriptCore.git
synced 2024-11-23 04:09:40 +00:00
207 lines
8.0 KiB
Python
207 lines
8.0 KiB
Python
#! /usr/bin/env python
|
|
|
|
# Copyright (C) 2016-2019 Apple Inc. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
|
|
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
|
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
# This tool processes the Unicode Character Database file CaseFolding.txt to create
|
|
# canonicalization table as decribed in ECMAScript 6 standard in section
|
|
# "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2.
|
|
|
|
import optparse
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
header = """/*
|
|
* Copyright (C) 2016 Apple Inc. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
|
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
// DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode
|
|
|
|
#include "config.h"
|
|
#include "YarrCanonicalize.h"
|
|
|
|
namespace JSC { namespace Yarr {
|
|
|
|
"""
|
|
|
|
footer = """} } // JSC::Yarr
|
|
"""
|
|
|
|
MaxUnicode = 0x10ffff
|
|
commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE)
|
|
|
|
def openOrExit(path, mode):
|
|
try:
|
|
dirname = os.path.dirname(path)
|
|
if not os.path.isdir(dirname):
|
|
os.makedirs(dirname)
|
|
if sys.version_info.major >= 3:
|
|
return open(path, mode, encoding="UTF-8")
|
|
else:
|
|
return open(path, mode)
|
|
except IOError as e:
|
|
print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror))
|
|
exit(1)
|
|
|
|
class Canonicalize:
|
|
def __init__(self):
|
|
self.canonicalGroups = {};
|
|
|
|
def addMapping(self, code, mapping):
|
|
if mapping not in self.canonicalGroups:
|
|
self.canonicalGroups[mapping] = []
|
|
self.canonicalGroups[mapping].append(code)
|
|
|
|
def readCaseFolding(self, file):
|
|
codesSeen = set()
|
|
for line in file:
|
|
line = line.split('#', 1)[0]
|
|
line = line.rstrip()
|
|
if (not len(line)):
|
|
continue
|
|
|
|
fields = commonAndSimpleLinesRE.match(line)
|
|
if (not fields):
|
|
continue
|
|
|
|
code = int(fields.group('code'), 16)
|
|
mapping = int(fields.group('mapping'), 16)
|
|
|
|
codesSeen.add(code)
|
|
self.addMapping(code, mapping)
|
|
|
|
for i in range(MaxUnicode + 1):
|
|
if i in codesSeen:
|
|
continue;
|
|
|
|
self.addMapping(i, i)
|
|
|
|
def createTables(self, file):
|
|
typeInfo = [""] * (MaxUnicode + 1)
|
|
characterSets = []
|
|
|
|
for mapping in sorted(self.canonicalGroups.keys()):
|
|
characters = self.canonicalGroups[mapping]
|
|
if len(characters) == 1:
|
|
typeInfo[characters[0]] = "CanonicalizeUnique:0"
|
|
else:
|
|
characters.sort()
|
|
if len(characters) > 2:
|
|
for ch in characters:
|
|
typeInfo[ch] = "CanonicalizeSet:%d" % len(characterSets)
|
|
characterSets.append(characters)
|
|
else:
|
|
low = characters[0]
|
|
high = characters[1]
|
|
delta = high - low
|
|
if delta == 1:
|
|
type = "CanonicalizeAlternatingUnaligned:0" if low & 1 else "CanonicalizeAlternatingAligned:0"
|
|
typeInfo[low] = type
|
|
typeInfo[high] = type
|
|
else:
|
|
typeInfo[low] = "CanonicalizeRangeLo:%d" % delta
|
|
typeInfo[high] = "CanonicalizeRangeHi:%d" % delta
|
|
|
|
rangeInfo = []
|
|
end = 0
|
|
while end <= MaxUnicode:
|
|
begin = end
|
|
type = typeInfo[end]
|
|
while end < MaxUnicode and typeInfo[end + 1] == type:
|
|
end = end + 1
|
|
rangeInfo.append({"begin": begin, "end": end, "type": type})
|
|
end = end + 1
|
|
|
|
for i in range(len(characterSets)):
|
|
characters = ""
|
|
cur_set = characterSets[i]
|
|
for ch in cur_set:
|
|
characters = characters + "0x{character:04x}, ".format(character=ch)
|
|
file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters))
|
|
|
|
file.write("\n")
|
|
file.write("static constexpr size_t UNICODE_CANONICALIZATION_SETS = {setCount:d};\n".format(setCount=len(characterSets)))
|
|
file.write("const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {\n")
|
|
|
|
for i in range(len(characterSets)):
|
|
file.write(" unicodeCharacterSet{setNumber:d},\n".format(setNumber=i))
|
|
|
|
file.write("};\n")
|
|
file.write("\n")
|
|
file.write("const size_t UNICODE_CANONICALIZATION_RANGES = {rangeCount:d};\n".format(rangeCount=len(rangeInfo)))
|
|
file.write("const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {\n")
|
|
|
|
for info in rangeInfo:
|
|
typeAndValue = info["type"].split(":")
|
|
file.write(" {{ 0x{begin:04x}, 0x{end:04x}, 0x{value:04x}, {type} }},\n".format(begin=info["begin"], end=info["end"], value=int(typeAndValue[1]), type=typeAndValue[0]))
|
|
|
|
file.write("};\n")
|
|
file.write("\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = optparse.OptionParser(usage = "usage: %prog <CaseFolding.txt> <YarrCanonicalizeUnicode.h>")
|
|
(options, args) = parser.parse_args()
|
|
|
|
if len(args) != 2:
|
|
parser.error("<CaseFolding.txt> <YarrCanonicalizeUnicode.h>")
|
|
|
|
caseFoldingTxtPath = args[0]
|
|
canonicalizeHPath = args[1]
|
|
caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r")
|
|
canonicalizeHFile = openOrExit(canonicalizeHPath, "w")
|
|
|
|
canonicalize = Canonicalize()
|
|
canonicalize.readCaseFolding(caseFoldingTxtFile)
|
|
|
|
canonicalizeHFile.write(header);
|
|
canonicalize.createTables(canonicalizeHFile)
|
|
canonicalizeHFile.write(footer);
|
|
|
|
caseFoldingTxtFile.close()
|
|
canonicalizeHFile.close()
|
|
|
|
exit(0)
|