adjust h2json and json2h scripts to be comment agnostic (#12532)

Co-authored-by: Michael Burgardt <michael.burgardt@rwth-aachen.de>
2024-11-23 07:59:42 +00:00 · 2021-06-16 17:21:57 +02:00 · 2021-06-16 17:21:57 +02:00 · 10f0073778
commit 10f0073778
parent 8b6e58eebd
2 changed files with 134 additions and 92 deletions
--- a/intl/h2json.py
+++ b/intl/h2json.py
@ -8,43 +8,62 @@ import sys
 import json
 try:
-   h_filename = sys.argv[1]
+    h_filename = sys.argv[1]
-   json_filename = h_filename.replace('.h', '.json')
+    json_filename = h_filename.replace('.h', '.json')
 except IndexError:
-   print("Usage: ./h2json.py msg_has_us.h")
+    print("Usage: ./h2json.py msg_has_us.h")
-   sys.exit(1)
+    sys.exit(1)
 if h_filename == 'msg_hash_lbl.h':
-   print("Skip")
+    print("Skip")
-   sys.exit(0)
+    sys.exit(0)
 p = re.compile(
    r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
 p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)')
 def parse_message(message):
-   key_start = max(message.find('(') + 1, message.find('*/') + 2)
+    a = message.find('/*')
-   key_end = message.find(',', key_start)
+    b = message.find('*/')
-   key = message[key_start:key_end].strip()
+    c = message.find('"')
-   value_start = message.find('"') + 1
+    new_msg = message
-   value_end = message.rfind('"')
+    while (a >= 0 and b >= 0) and (a < c < b or b < c):
-   value = message[value_start:value_end]
+        new_msg = new_msg[:a] + new_msg[b + 2:]
-   return key, value
+        c = new_msg.find('"', a)
        b = new_msg.find('*/', a)
        a = new_msg.find('/*', a)
    # get key word
    word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
    a = new_msg.rfind('/*')
    b = new_msg.rfind('*/')
    d = new_msg.rfind('"')
    while (a >= 0 and b >= 0) and (a < d < b or a > d):
        new_msg = new_msg[:a]
        a = new_msg.rfind('/*')
        b = new_msg.rfind('*/')
        d = new_msg.rfind('"')
    # get value
    value = new_msg[c + 1:d]
    return word, value
 try:
-   with open(h_filename, 'r+') as h_file:
+    with open(h_filename, 'r+', encoding='utf-8') as h_file:
-      text = h_file.read()
+        text = h_file.read()
-      result = p.findall(text)
+        result = p.findall(text)
-      seen = set()
+        seen = set()
-      messages = {}
+        messages = {}
-      for msg in result:
+        for msg in result:
-         key, val = parse_message(msg)
+            key, val = parse_message(msg)
-         if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val:
+            if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val:
-            messages[key] = val.replace('\\\"', '"') # unescape
+                messages[key] = val.replace('\\\"', '"')  # unescape
-            if key not in seen:
+                if key not in seen:
-               seen.add(key)
+                    seen.add(key)
-            else:
+                else:
-               print("Duplicate key: " + key)
+                    print("Duplicate key: " + key)
-      with open(json_filename, 'w') as json_file:
+        with open(json_filename, 'w', encoding='utf-8') as json_file:
-         json.dump(messages, json_file, indent=2)
+            json.dump(messages, json_file, indent=2)
 except EnvironmentError:
-   print('Cannot read/write ' + h_filename)
+    print('Cannot read/write ' + h_filename)
--- a/intl/json2h.py
+++ b/intl/json2h.py
@ -8,81 +8,104 @@ import sys
 import json
 try:
-   json_filename = sys.argv[1]
+    json_filename = sys.argv[1]
-   h_filename = json_filename.replace('.json', '.h')
+    h_filename = json_filename.replace('.json', '.h')
 except IndexError:
-   print("Usage: ./template.py <language_postfix>")
+    print("Usage: ./template.py <language_postfix>")
-   sys.exit(1)
+    sys.exit(1)
 if json_filename == 'msg_hash_us.json' or json_filename == 'msg_hash_lbl.json':
-   print("Skip")
+    print("Skip")
-   sys.exit(0)
+    sys.exit(0)
 p = re.compile(
    r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
 p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)')
 def c89_cut(old_str):
-   new_str = ''
+    new_str = ''
-   byte_count = 0
+    byte_count = 0
-   for c in old_str:
+    for c in old_str:
-      byte_count += len(c.encode('utf-8'))
+        byte_count += len(c.encode('utf-8'))
-      if byte_count > 500:
+        if byte_count > 500:
-         return new_str + '[...]'
+            return new_str + '[...]'
-      new_str += c
+        new_str += c
-   return new_str
+    return new_str
 def parse_message(message):
-   key_start = max(message.find('(') + 1, message.find('*/') + 2)
+    # remove all comments before the value (= the string)
-   key_end = message.find(',')
+    a = message.find('/*')
-   key = message[key_start:key_end].strip()
+    b = message.find('*/')
-   value_start = message.find('"') + 1
+    c = message.find('"')
-   value_end = message.rfind('"')
+    new_msg = message
-   value = message[value_start:value_end]
+    while (a >= 0 and b >= 0) and (a < c < b or b < c):
-   return key, value
+        new_msg = new_msg[:a] + new_msg[b + 2:]
        c = new_msg.find('"', a)
        b = new_msg.find('*/', a)
        a = new_msg.find('/*', a)
    # get key word
    word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
    # remove all comments after the value (= the string)
    a = new_msg.rfind('/*')
    b = new_msg.rfind('*/')
    d = new_msg.rfind('"')
    while (a >= 0 and b >= 0) and (a < d < b or a > d):
        new_msg = new_msg[:a]
        a = new_msg.rfind('/*')
        b = new_msg.rfind('*/')
        d = new_msg.rfind('"')
    # get value
    value = new_msg[c + 1:d]
    return word, value
 def parse_messages(text):
-   result = p.findall(text)
+    result = p.findall(text)
-   seen = set()
+    seen = set()
-   msg_list = []
+    msg_list = []
-   for msg in result:
+    for msg in result:
-      key, val = parse_message(msg)
+        key, val = parse_message(msg)
-      item = {'key': key, 'val': val, 'msg': msg}
+        item = {'key': key, 'val': val, 'msg': msg}
-      msg_list.append(item)
+        msg_list.append(item)
-      if key not in seen:
+        if key not in seen:
-         seen.add(key)
+            seen.add(key)
-      else:
+        else:
-         print("Duplicate key: " + key)
+            print("Duplicate key: " + key)
-   return msg_list
+
    return msg_list
 def update(messages, template, source_messages):
-   new_translation = template
+    translation = template
-   template_messages = parse_messages(template)
+    template_messages = parse_messages(template)
-   for tp_msg in template_messages:
+    for tp_msg in template_messages:
-      old_msg = tp_msg['msg']
+        old_msg = tp_msg['msg']
-      if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]:
+        if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]:
-         tp_msg_val = tp_msg['val']
+            tp_msg_val = tp_msg['val']
-         tl_msg_val = messages[tp_msg['key']]
+            tl_msg_val = messages[tp_msg['key']]
-         tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '') # escape
+            tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '')  # escape
-         if tp_msg['key'].find('_QT_') < 0:
+            if tp_msg['key'].find('_QT_') < 0:
-            tl_msg_val = c89_cut(tl_msg_val)
+                tl_msg_val = c89_cut(tl_msg_val)
-         # Replace last match, incase the key contains the value string
+            # Replace last match, in case the key contains the value string
-         new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1]
+            new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1]
-         new_translation = new_translation.replace(old_msg, new_msg)
+            translation = translation.replace(old_msg, new_msg)
-      # Remove English duplicates and non-translateable strings
+        # Remove English duplicates and non-translatable strings
-      else:
+        else:
-         new_translation = new_translation.replace(old_msg + '\n', '')
+            translation = translation.replace(old_msg + '\n', '')
-   return new_translation
+    return translation
-with open('msg_hash_us.h', 'r') as template_file:
+with open('msg_hash_us.h', 'r', encoding='utf-8') as template_file:
-   template = template_file.read()
+    template = template_file.read()
-   with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file:
+    with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file:
-      source_messages = json.load(source_json_file)
+        source_messages = json.load(source_json_file)
-      with open(json_filename, 'r+', encoding='utf-8') as json_file:
+        with open(json_filename, 'r+', encoding='utf-8') as json_file:
-         messages = json.load(json_file)
+            messages = json.load(json_file)
-         new_translation = update(messages, template, source_messages)
+            new_translation = update(messages, template, source_messages)
-         with open(h_filename, 'w', encoding='utf-8') as h_file:
+            with open(h_filename, 'w', encoding='utf-8') as h_file:
-            h_file.seek(0)
+                h_file.seek(0)
-            h_file.write(new_translation)
+                h_file.write(new_translation)
-            h_file.truncate()
+                h_file.truncate()