mirror of
https://github.com/darlinghq/darling-JavaScriptCore.git
synced 2025-04-16 22:09:58 +00:00
206 lines
7.8 KiB
Python
206 lines
7.8 KiB
Python
#!/usr/bin/env python
|
|
|
|
# Copyright (C) 2018 Andy VanWagoner (andy@vanwagoner.family)
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
|
|
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
|
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
# This tool processes the IANA file language-subtag-registry.txt to create
|
|
# information required to canonicalize language tags according to ECMA 402 and
|
|
# RFC 5646 Section 4.5.
|
|
# https://www.iana.org/assignments/language-subtag-registry
|
|
# https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag
|
|
# https://tools.ietf.org/html/rfc5646#section-4.5
|
|
|
|
import sys
|
|
import optparse
|
|
import os
|
|
|
|
header = """// DO NO EDIT! - This file was generated by """ + __file__ + """
|
|
"""
|
|
|
|
|
|
footer = """
|
|
"""
|
|
|
|
|
|
def openOrExit(path, mode):
|
|
try:
|
|
if sys.version_info.major >= 3:
|
|
return open(path, mode, encoding="UTF-8")
|
|
else:
|
|
return open(path, mode)
|
|
except IOError as e:
|
|
print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror))
|
|
exit(1)
|
|
|
|
|
|
class SubtagRegistry:
|
|
def __init__(self):
|
|
self.languageMap = {}
|
|
self.extlangMap = {}
|
|
self.regionMap = {}
|
|
self.redundantMap = {}
|
|
self.grandfatheredMap = {}
|
|
|
|
def parse(self, file):
|
|
record = {}
|
|
for line in file:
|
|
line = line.strip()
|
|
if line.startswith("#"):
|
|
continue
|
|
|
|
if line == "%%":
|
|
self.process(record)
|
|
record = {}
|
|
continue
|
|
|
|
if ":" in line:
|
|
key, value = line.split(":", 1)
|
|
record[key.strip()] = value.strip()
|
|
else:
|
|
# Description often continues on the next line
|
|
record[key.strip()] = " " + line
|
|
self.process(record)
|
|
|
|
def process(self, record):
|
|
if "File-Date" in record:
|
|
self.fileDate = record["File-Date"]
|
|
|
|
if not ("Type" in record):
|
|
return
|
|
|
|
type = record["Type"]
|
|
preferred = record.get("Preferred-Value")
|
|
if type == "language" and preferred:
|
|
self.languageMap[record["Subtag"]] = preferred
|
|
elif type == "extlang":
|
|
self.extlangMap[record["Subtag"]] = record["Prefix"]
|
|
elif type == "region" and preferred:
|
|
self.regionMap[record["Subtag"]] = preferred
|
|
elif type == "redundant" and preferred:
|
|
lang = self.extlangMap.get(preferred)
|
|
if "{}-{}".format(lang, preferred) != record["Tag"]:
|
|
self.redundantMap[record["Tag"]] = preferred
|
|
elif type == "variant" and preferred:
|
|
key = "{}-{}".format(record['Prefix'], record['Subtag'])
|
|
if preferred == "alalc97":
|
|
preferred = "ja-Latn-alalc97"
|
|
self.redundantMap[key] = preferred
|
|
elif type == "grandfathered":
|
|
key = record["Tag"].lower()
|
|
value = record.get("Preferred-Value", key)
|
|
self.grandfatheredMap[key] = value
|
|
|
|
def dump(self, file):
|
|
if self.fileDate:
|
|
file.write("// language-subtag-registry file date: {}\n".format(self.fileDate))
|
|
file.write("\n#pragma once\n")
|
|
file.write("\n#if ENABLE(INTL)\n")
|
|
file.write("\nnamespace JSC {\n")
|
|
self.dumpLookup(file, "intlPreferredLanguageTag", self.languageMap)
|
|
self.dumpLookup(file, "intlPreferredExtlangTag", self.extlangMap)
|
|
self.dumpLookup(file, "intlPreferredRegionTag", self.regionMap)
|
|
self.dumpLookup(file, "intlRedundantLanguageTag", self.redundantMap)
|
|
self.dumpLookup(file, "intlGrandfatheredLanguageTag", self.grandfatheredMap)
|
|
file.write("\n} // namespace JSC\n")
|
|
file.write("\n#endif // ENABLE(INTL)\n")
|
|
|
|
def dumpLookup(self, file, name, map):
|
|
file.write("\nstatic String {}(const String& tag)\n{{\n".format(name))
|
|
file.write(" // {} possible replacements\n".format(len(map)))
|
|
# We could pick the lookup implementation per map if desired
|
|
# Anecdotal perf: if > switch > hash (slowest)
|
|
# Code complexity: switch > if > hash (least complex)
|
|
# Algo complexity: if = O(N) > switch > O(log N) > hash = O(1) (least complex)
|
|
self.dumpIfLookup(file, name, map)
|
|
file.write("}\n")
|
|
|
|
def dumpHashLookup(self, file, name, map):
|
|
file.write(" static NeverDestroyed<HashMap<String, String>> cache;\n")
|
|
file.write(" HashMap<String, String>& map = cache.get();\n")
|
|
file.write(" if (UNLIKELY(map.isEmpty())) {\n")
|
|
entries = [" map.add(\"{}\"_s, \"{}\"_s);\n".format(k, v) for k, v in map.items()]
|
|
entries.sort()
|
|
file.write("".join(entries))
|
|
file.write(" }\n")
|
|
file.write(" return map.get(tag);\n")
|
|
|
|
def dumpIfLookup(self, file, name, map):
|
|
entries = [" if (tag == \"{}\")\n return \"{}\"_s;".format(k, v) for k, v in map.items()]
|
|
entries.sort()
|
|
file.write("\n".join(entries))
|
|
file.write("\n return String();\n")
|
|
|
|
def dumpSwitchLookup(self, file, name, map):
|
|
tree = {}
|
|
for k, v in map.items():
|
|
node = tree
|
|
for char in k:
|
|
if not (char in node):
|
|
node[char] = {}
|
|
node = node[char]
|
|
node["value"] = v
|
|
self.dumpSwitchLookupTree(file, tree, 0)
|
|
file.write("\n return String();\n")
|
|
|
|
def dumpSwitchLookupTree(self, file, tree, level):
|
|
indent = "".ljust((level + 1) * 4)
|
|
if "value" in tree:
|
|
file.write(indent + "if (tag.length() == {})\n".format(level))
|
|
file.write(indent + " return \"{}\"_s;\n".format(tree["value"]))
|
|
del tree["value"]
|
|
keys = tree.keys()
|
|
keys.sort()
|
|
if len(keys) == 0:
|
|
return
|
|
file.write(indent + "switch (tag[{}]) {{\n".format(level))
|
|
for key in keys:
|
|
file.write(indent + "case {}:\n".format(ord(key)))
|
|
self.dumpSwitchLookupTree(file, tree[key], level + 1)
|
|
file.write(indent + " break;\n")
|
|
file.write(indent + "default: break;\n")
|
|
file.write(indent + "}\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = optparse.OptionParser(usage="usage: %prog <language-subtag-registry.txt> <IntlCanonicalizeLanguage.h>")
|
|
(options, args) = parser.parse_args()
|
|
|
|
if len(args) != 2:
|
|
parser.error("<language-subtag-registry.txt> <IntlCanonicalizeLanguage.h>")
|
|
|
|
registryPath = args[0]
|
|
intlCanonHPath = args[1]
|
|
|
|
registryFile = openOrExit(registryPath, "r")
|
|
intlCanonHFile = openOrExit(intlCanonHPath, "w")
|
|
|
|
intlCanonHFile.write(header)
|
|
|
|
registry = SubtagRegistry()
|
|
registry.parse(registryFile)
|
|
registry.dump(intlCanonHFile)
|
|
|
|
intlCanonHFile.write(footer)
|
|
|
|
exit(0)
|