adjust h2json and json2h scripts to be comment agnostic (#12532)

Co-authored-by: Michael Burgardt <michael.burgardt@rwth-aachen.de>
This commit is contained in:
Michael Burgardt 2021-06-16 17:21:57 +02:00 committed by GitHub
parent 8b6e58eebd
commit 10f0073778
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 134 additions and 92 deletions

View File

@ -8,43 +8,62 @@ import sys
import json import json
try: try:
h_filename = sys.argv[1] h_filename = sys.argv[1]
json_filename = h_filename.replace('.h', '.json') json_filename = h_filename.replace('.h', '.json')
except IndexError: except IndexError:
print("Usage: ./h2json.py msg_has_us.h") print("Usage: ./h2json.py msg_has_us.h")
sys.exit(1) sys.exit(1)
if h_filename == 'msg_hash_lbl.h': if h_filename == 'msg_hash_lbl.h':
print("Skip") print("Skip")
sys.exit(0) sys.exit(0)
p = re.compile(
r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)')
def parse_message(message): def parse_message(message):
key_start = max(message.find('(') + 1, message.find('*/') + 2) a = message.find('/*')
key_end = message.find(',', key_start) b = message.find('*/')
key = message[key_start:key_end].strip() c = message.find('"')
value_start = message.find('"') + 1 new_msg = message
value_end = message.rfind('"') while (a >= 0 and b >= 0) and (a < c < b or b < c):
value = message[value_start:value_end] new_msg = new_msg[:a] + new_msg[b + 2:]
return key, value c = new_msg.find('"', a)
b = new_msg.find('*/', a)
a = new_msg.find('/*', a)
# get key word
word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
a = new_msg.rfind('/*')
b = new_msg.rfind('*/')
d = new_msg.rfind('"')
while (a >= 0 and b >= 0) and (a < d < b or a > d):
new_msg = new_msg[:a]
a = new_msg.rfind('/*')
b = new_msg.rfind('*/')
d = new_msg.rfind('"')
# get value
value = new_msg[c + 1:d]
return word, value
try: try:
with open(h_filename, 'r+') as h_file: with open(h_filename, 'r+', encoding='utf-8') as h_file:
text = h_file.read() text = h_file.read()
result = p.findall(text) result = p.findall(text)
seen = set() seen = set()
messages = {} messages = {}
for msg in result: for msg in result:
key, val = parse_message(msg) key, val = parse_message(msg)
if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val: if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val:
messages[key] = val.replace('\\\"', '"') # unescape messages[key] = val.replace('\\\"', '"') # unescape
if key not in seen: if key not in seen:
seen.add(key) seen.add(key)
else: else:
print("Duplicate key: " + key) print("Duplicate key: " + key)
with open(json_filename, 'w') as json_file: with open(json_filename, 'w', encoding='utf-8') as json_file:
json.dump(messages, json_file, indent=2) json.dump(messages, json_file, indent=2)
except EnvironmentError: except EnvironmentError:
print('Cannot read/write ' + h_filename) print('Cannot read/write ' + h_filename)

View File

@ -8,81 +8,104 @@ import sys
import json import json
try: try:
json_filename = sys.argv[1] json_filename = sys.argv[1]
h_filename = json_filename.replace('.json', '.h') h_filename = json_filename.replace('.json', '.h')
except IndexError: except IndexError:
print("Usage: ./template.py <language_postfix>") print("Usage: ./template.py <language_postfix>")
sys.exit(1) sys.exit(1)
if json_filename == 'msg_hash_us.json' or json_filename == 'msg_hash_lbl.json': if json_filename == 'msg_hash_us.json' or json_filename == 'msg_hash_lbl.json':
print("Skip") print("Skip")
sys.exit(0) sys.exit(0)
p = re.compile(
r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)')
def c89_cut(old_str): def c89_cut(old_str):
new_str = '' new_str = ''
byte_count = 0 byte_count = 0
for c in old_str: for c in old_str:
byte_count += len(c.encode('utf-8')) byte_count += len(c.encode('utf-8'))
if byte_count > 500: if byte_count > 500:
return new_str + '[...]' return new_str + '[...]'
new_str += c new_str += c
return new_str return new_str
def parse_message(message): def parse_message(message):
key_start = max(message.find('(') + 1, message.find('*/') + 2) # remove all comments before the value (= the string)
key_end = message.find(',') a = message.find('/*')
key = message[key_start:key_end].strip() b = message.find('*/')
value_start = message.find('"') + 1 c = message.find('"')
value_end = message.rfind('"') new_msg = message
value = message[value_start:value_end] while (a >= 0 and b >= 0) and (a < c < b or b < c):
return key, value new_msg = new_msg[:a] + new_msg[b + 2:]
c = new_msg.find('"', a)
b = new_msg.find('*/', a)
a = new_msg.find('/*', a)
# get key word
word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
# remove all comments after the value (= the string)
a = new_msg.rfind('/*')
b = new_msg.rfind('*/')
d = new_msg.rfind('"')
while (a >= 0 and b >= 0) and (a < d < b or a > d):
new_msg = new_msg[:a]
a = new_msg.rfind('/*')
b = new_msg.rfind('*/')
d = new_msg.rfind('"')
# get value
value = new_msg[c + 1:d]
return word, value
def parse_messages(text): def parse_messages(text):
result = p.findall(text) result = p.findall(text)
seen = set() seen = set()
msg_list = [] msg_list = []
for msg in result: for msg in result:
key, val = parse_message(msg) key, val = parse_message(msg)
item = {'key': key, 'val': val, 'msg': msg} item = {'key': key, 'val': val, 'msg': msg}
msg_list.append(item) msg_list.append(item)
if key not in seen: if key not in seen:
seen.add(key) seen.add(key)
else: else:
print("Duplicate key: " + key) print("Duplicate key: " + key)
return msg_list
return msg_list
def update(messages, template, source_messages): def update(messages, template, source_messages):
new_translation = template translation = template
template_messages = parse_messages(template) template_messages = parse_messages(template)
for tp_msg in template_messages: for tp_msg in template_messages:
old_msg = tp_msg['msg'] old_msg = tp_msg['msg']
if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]: if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]:
tp_msg_val = tp_msg['val'] tp_msg_val = tp_msg['val']
tl_msg_val = messages[tp_msg['key']] tl_msg_val = messages[tp_msg['key']]
tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '') # escape tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '') # escape
if tp_msg['key'].find('_QT_') < 0: if tp_msg['key'].find('_QT_') < 0:
tl_msg_val = c89_cut(tl_msg_val) tl_msg_val = c89_cut(tl_msg_val)
# Replace last match, incase the key contains the value string # Replace last match, in case the key contains the value string
new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1] new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1]
new_translation = new_translation.replace(old_msg, new_msg) translation = translation.replace(old_msg, new_msg)
# Remove English duplicates and non-translateable strings # Remove English duplicates and non-translatable strings
else: else:
new_translation = new_translation.replace(old_msg + '\n', '') translation = translation.replace(old_msg + '\n', '')
return new_translation return translation
with open('msg_hash_us.h', 'r') as template_file: with open('msg_hash_us.h', 'r', encoding='utf-8') as template_file:
template = template_file.read() template = template_file.read()
with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file: with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file:
source_messages = json.load(source_json_file) source_messages = json.load(source_json_file)
with open(json_filename, 'r+', encoding='utf-8') as json_file: with open(json_filename, 'r+', encoding='utf-8') as json_file:
messages = json.load(json_file) messages = json.load(json_file)
new_translation = update(messages, template, source_messages) new_translation = update(messages, template, source_messages)
with open(h_filename, 'w', encoding='utf-8') as h_file: with open(h_filename, 'w', encoding='utf-8') as h_file:
h_file.seek(0) h_file.seek(0)
h_file.write(new_translation) h_file.write(new_translation)
h_file.truncate() h_file.truncate()