From 10f00737784aac4ef59ec01fc53d33974516ebd8 Mon Sep 17 00:00:00 2001 From: Michael Burgardt Date: Wed, 16 Jun 2021 17:21:57 +0200 Subject: [PATCH] adjust h2json and json2h scripts to be comment agnostic (#12532) Co-authored-by: Michael Burgardt --- intl/h2json.py | 79 ++++++++++++++++---------- intl/json2h.py | 147 ++++++++++++++++++++++++++++--------------------- 2 files changed, 134 insertions(+), 92 deletions(-) diff --git a/intl/h2json.py b/intl/h2json.py index ca93bfd747..fa1a07d7e0 100755 --- a/intl/h2json.py +++ b/intl/h2json.py @@ -8,43 +8,62 @@ import sys import json try: - h_filename = sys.argv[1] - json_filename = h_filename.replace('.h', '.json') + h_filename = sys.argv[1] + json_filename = h_filename.replace('.h', '.json') except IndexError: - print("Usage: ./h2json.py msg_has_us.h") - sys.exit(1) + print("Usage: ./h2json.py msg_has_us.h") + sys.exit(1) if h_filename == 'msg_hash_lbl.h': - print("Skip") - sys.exit(0) + print("Skip") + sys.exit(0) + +p = re.compile( + r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)') -p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)') def parse_message(message): - key_start = max(message.find('(') + 1, message.find('*/') + 2) - key_end = message.find(',', key_start) - key = message[key_start:key_end].strip() - value_start = message.find('"') + 1 - value_end = message.rfind('"') - value = message[value_start:value_end] - return key, value + a = message.find('/*') + b = message.find('*/') + c = message.find('"') + new_msg = message + while (a >= 0 and b >= 0) and (a < c < b or b < c): + new_msg = new_msg[:a] + new_msg[b + 2:] + c = new_msg.find('"', a) + b = new_msg.find('*/', a) + a = new_msg.find('/*', a) + # get key word + word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip() + + a = new_msg.rfind('/*') + b = new_msg.rfind('*/') + d = new_msg.rfind('"') + while (a >= 0 and b >= 0) and (a < d < b or a > d): + new_msg = new_msg[:a] + a = new_msg.rfind('/*') + b = new_msg.rfind('*/') + d = new_msg.rfind('"') + # get value + value = new_msg[c + 1:d] + + return word, value try: - with open(h_filename, 'r+') as h_file: - text = h_file.read() - result = p.findall(text) - seen = set() - messages = {} - for msg in result: - key, val = parse_message(msg) - if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val: - messages[key] = val.replace('\\\"', '"') # unescape - if key not in seen: - seen.add(key) - else: - print("Duplicate key: " + key) - with open(json_filename, 'w') as json_file: - json.dump(messages, json_file, indent=2) + with open(h_filename, 'r+', encoding='utf-8') as h_file: + text = h_file.read() + result = p.findall(text) + seen = set() + messages = {} + for msg in result: + key, val = parse_message(msg) + if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val: + messages[key] = val.replace('\\\"', '"') # unescape + if key not in seen: + seen.add(key) + else: + print("Duplicate key: " + key) + with open(json_filename, 'w', encoding='utf-8') as json_file: + json.dump(messages, json_file, indent=2) except EnvironmentError: - print('Cannot read/write ' + h_filename) + print('Cannot read/write ' + h_filename) diff --git a/intl/json2h.py b/intl/json2h.py index d2db7fba20..b5d3465b17 100755 --- a/intl/json2h.py +++ b/intl/json2h.py @@ -8,81 +8,104 @@ import sys import json try: - json_filename = sys.argv[1] - h_filename = json_filename.replace('.json', '.h') + json_filename = sys.argv[1] + h_filename = json_filename.replace('.json', '.h') except IndexError: - print("Usage: ./template.py ") - sys.exit(1) + print("Usage: ./template.py ") + sys.exit(1) if json_filename == 'msg_hash_us.json' or json_filename == 'msg_hash_lbl.json': - print("Skip") - sys.exit(0) + print("Skip") + sys.exit(0) + +p = re.compile( + r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)') -p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)') def c89_cut(old_str): - new_str = '' - byte_count = 0 - for c in old_str: - byte_count += len(c.encode('utf-8')) - if byte_count > 500: - return new_str + '[...]' - new_str += c - return new_str + new_str = '' + byte_count = 0 + for c in old_str: + byte_count += len(c.encode('utf-8')) + if byte_count > 500: + return new_str + '[...]' + new_str += c + return new_str + def parse_message(message): - key_start = max(message.find('(') + 1, message.find('*/') + 2) - key_end = message.find(',') - key = message[key_start:key_end].strip() - value_start = message.find('"') + 1 - value_end = message.rfind('"') - value = message[value_start:value_end] - return key, value + # remove all comments before the value (= the string) + a = message.find('/*') + b = message.find('*/') + c = message.find('"') + new_msg = message + while (a >= 0 and b >= 0) and (a < c < b or b < c): + new_msg = new_msg[:a] + new_msg[b + 2:] + c = new_msg.find('"', a) + b = new_msg.find('*/', a) + a = new_msg.find('/*', a) + # get key word + word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip() + + # remove all comments after the value (= the string) + a = new_msg.rfind('/*') + b = new_msg.rfind('*/') + d = new_msg.rfind('"') + while (a >= 0 and b >= 0) and (a < d < b or a > d): + new_msg = new_msg[:a] + a = new_msg.rfind('/*') + b = new_msg.rfind('*/') + d = new_msg.rfind('"') + # get value + value = new_msg[c + 1:d] + + return word, value def parse_messages(text): - result = p.findall(text) - seen = set() - msg_list = [] - for msg in result: - key, val = parse_message(msg) - item = {'key': key, 'val': val, 'msg': msg} - msg_list.append(item) - if key not in seen: - seen.add(key) - else: - print("Duplicate key: " + key) - return msg_list + result = p.findall(text) + seen = set() + msg_list = [] + for msg in result: + key, val = parse_message(msg) + item = {'key': key, 'val': val, 'msg': msg} + msg_list.append(item) + if key not in seen: + seen.add(key) + else: + print("Duplicate key: " + key) + + return msg_list def update(messages, template, source_messages): - new_translation = template - template_messages = parse_messages(template) - for tp_msg in template_messages: - old_msg = tp_msg['msg'] - if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]: - tp_msg_val = tp_msg['val'] - tl_msg_val = messages[tp_msg['key']] - tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '') # escape - if tp_msg['key'].find('_QT_') < 0: - tl_msg_val = c89_cut(tl_msg_val) - # Replace last match, incase the key contains the value string - new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1] - new_translation = new_translation.replace(old_msg, new_msg) - # Remove English duplicates and non-translateable strings - else: - new_translation = new_translation.replace(old_msg + '\n', '') - return new_translation + translation = template + template_messages = parse_messages(template) + for tp_msg in template_messages: + old_msg = tp_msg['msg'] + if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]: + tp_msg_val = tp_msg['val'] + tl_msg_val = messages[tp_msg['key']] + tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '') # escape + if tp_msg['key'].find('_QT_') < 0: + tl_msg_val = c89_cut(tl_msg_val) + # Replace last match, in case the key contains the value string + new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1] + translation = translation.replace(old_msg, new_msg) + # Remove English duplicates and non-translatable strings + else: + translation = translation.replace(old_msg + '\n', '') + return translation -with open('msg_hash_us.h', 'r') as template_file: - template = template_file.read() - with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file: - source_messages = json.load(source_json_file) - with open(json_filename, 'r+', encoding='utf-8') as json_file: - messages = json.load(json_file) - new_translation = update(messages, template, source_messages) - with open(h_filename, 'w', encoding='utf-8') as h_file: - h_file.seek(0) - h_file.write(new_translation) - h_file.truncate() +with open('msg_hash_us.h', 'r', encoding='utf-8') as template_file: + template = template_file.read() + with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file: + source_messages = json.load(source_json_file) + with open(json_filename, 'r+', encoding='utf-8') as json_file: + messages = json.load(json_file) + new_translation = update(messages, template, source_messages) + with open(h_filename, 'w', encoding='utf-8') as h_file: + h_file.seek(0) + h_file.write(new_translation) + h_file.truncate()