From 10f00737784aac4ef59ec01fc53d33974516ebd8 Mon Sep 17 00:00:00 2001
From: Michael Burgardt <michael.burgardt@gmail.com>
Date: Wed, 16 Jun 2021 17:21:57 +0200
Subject: [PATCH] adjust h2json and json2h scripts to be comment agnostic
 (#12532)

Co-authored-by: Michael Burgardt <michael.burgardt@rwth-aachen.de>
---
 intl/h2json.py |  79 ++++++++++++++++----------
 intl/json2h.py | 147 ++++++++++++++++++++++++++++---------------------
 2 files changed, 134 insertions(+), 92 deletions(-)

diff --git a/intl/h2json.py b/intl/h2json.py
index ca93bfd747..fa1a07d7e0 100755
--- a/intl/h2json.py
+++ b/intl/h2json.py
@@ -8,43 +8,62 @@ import sys
 import json
 
 try:
-   h_filename = sys.argv[1]
-   json_filename = h_filename.replace('.h', '.json')
+    h_filename = sys.argv[1]
+    json_filename = h_filename.replace('.h', '.json')
 except IndexError:
-   print("Usage: ./h2json.py msg_has_us.h")
-   sys.exit(1)
+    print("Usage: ./h2json.py msg_has_us.h")
+    sys.exit(1)
 
 if h_filename == 'msg_hash_lbl.h':
-   print("Skip")
-   sys.exit(0)
+    print("Skip")
+    sys.exit(0)
+
+p = re.compile(
+    r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
 
-p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)')
 
 def parse_message(message):
-   key_start = max(message.find('(') + 1, message.find('*/') + 2)
-   key_end = message.find(',', key_start)
-   key = message[key_start:key_end].strip()
-   value_start = message.find('"') + 1
-   value_end = message.rfind('"')
-   value = message[value_start:value_end]
-   return key, value
+    a = message.find('/*')
+    b = message.find('*/')
+    c = message.find('"')
+    new_msg = message
+    while (a >= 0 and b >= 0) and (a < c < b or b < c):
+        new_msg = new_msg[:a] + new_msg[b + 2:]
+        c = new_msg.find('"', a)
+        b = new_msg.find('*/', a)
+        a = new_msg.find('/*', a)
+    # get key word
+    word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
+
+    a = new_msg.rfind('/*')
+    b = new_msg.rfind('*/')
+    d = new_msg.rfind('"')
+    while (a >= 0 and b >= 0) and (a < d < b or a > d):
+        new_msg = new_msg[:a]
+        a = new_msg.rfind('/*')
+        b = new_msg.rfind('*/')
+        d = new_msg.rfind('"')
+    # get value
+    value = new_msg[c + 1:d]
+
+    return word, value
 
 
 try:
-   with open(h_filename, 'r+') as h_file:
-      text = h_file.read()
-      result = p.findall(text)
-      seen = set()
-      messages = {}
-      for msg in result:
-         key, val = parse_message(msg)
-         if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val:
-            messages[key] = val.replace('\\\"', '"') # unescape
-            if key not in seen:
-               seen.add(key)
-            else:
-               print("Duplicate key: " + key)
-      with open(json_filename, 'w') as json_file:
-         json.dump(messages, json_file, indent=2)
+    with open(h_filename, 'r+', encoding='utf-8') as h_file:
+        text = h_file.read()
+        result = p.findall(text)
+        seen = set()
+        messages = {}
+        for msg in result:
+            key, val = parse_message(msg)
+            if not key.startswith('MENU_ENUM_LABEL_VALUE_LANG_') and val:
+                messages[key] = val.replace('\\\"', '"')  # unescape
+                if key not in seen:
+                    seen.add(key)
+                else:
+                    print("Duplicate key: " + key)
+        with open(json_filename, 'w', encoding='utf-8') as json_file:
+            json.dump(messages, json_file, indent=2)
 except EnvironmentError:
-   print('Cannot read/write ' + h_filename)
+    print('Cannot read/write ' + h_filename)
diff --git a/intl/json2h.py b/intl/json2h.py
index d2db7fba20..b5d3465b17 100755
--- a/intl/json2h.py
+++ b/intl/json2h.py
@@ -8,81 +8,104 @@ import sys
 import json
 
 try:
-   json_filename = sys.argv[1]
-   h_filename = json_filename.replace('.json', '.h')
+    json_filename = sys.argv[1]
+    h_filename = json_filename.replace('.json', '.h')
 except IndexError:
-   print("Usage: ./template.py <language_postfix>")
-   sys.exit(1)
+    print("Usage: ./template.py <language_postfix>")
+    sys.exit(1)
 
 if json_filename == 'msg_hash_us.json' or json_filename == 'msg_hash_lbl.json':
-   print("Skip")
-   sys.exit(0)
+    print("Skip")
+    sys.exit(0)
+
+p = re.compile(
+    r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
 
-p = re.compile(r'MSG_HASH\(\s*\/?\*?.*\*?\/?\s*[a-zA-Z0-9_]+\s*,\s*\".*\"\s*\)')
 
 def c89_cut(old_str):
-   new_str = ''
-   byte_count = 0
-   for c in old_str:
-      byte_count += len(c.encode('utf-8'))
-      if byte_count > 500:
-         return new_str + '[...]'
-      new_str += c
-   return new_str
+    new_str = ''
+    byte_count = 0
+    for c in old_str:
+        byte_count += len(c.encode('utf-8'))
+        if byte_count > 500:
+            return new_str + '[...]'
+        new_str += c
+    return new_str
+
 
 def parse_message(message):
-   key_start = max(message.find('(') + 1, message.find('*/') + 2)
-   key_end = message.find(',')
-   key = message[key_start:key_end].strip()
-   value_start = message.find('"') + 1
-   value_end = message.rfind('"')
-   value = message[value_start:value_end]
-   return key, value
+    # remove all comments before the value (= the string)
+    a = message.find('/*')
+    b = message.find('*/')
+    c = message.find('"')
+    new_msg = message
+    while (a >= 0 and b >= 0) and (a < c < b or b < c):
+        new_msg = new_msg[:a] + new_msg[b + 2:]
+        c = new_msg.find('"', a)
+        b = new_msg.find('*/', a)
+        a = new_msg.find('/*', a)
+    # get key word
+    word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
+
+    # remove all comments after the value (= the string)
+    a = new_msg.rfind('/*')
+    b = new_msg.rfind('*/')
+    d = new_msg.rfind('"')
+    while (a >= 0 and b >= 0) and (a < d < b or a > d):
+        new_msg = new_msg[:a]
+        a = new_msg.rfind('/*')
+        b = new_msg.rfind('*/')
+        d = new_msg.rfind('"')
+    # get value
+    value = new_msg[c + 1:d]
+
+    return word, value
 
 
 def parse_messages(text):
-   result = p.findall(text)
-   seen = set()
-   msg_list = []
-   for msg in result:
-      key, val = parse_message(msg)
-      item = {'key': key, 'val': val, 'msg': msg}
-      msg_list.append(item)
-      if key not in seen:
-         seen.add(key)
-      else:
-         print("Duplicate key: " + key)
-   return msg_list
+    result = p.findall(text)
+    seen = set()
+    msg_list = []
+    for msg in result:
+        key, val = parse_message(msg)
+        item = {'key': key, 'val': val, 'msg': msg}
+        msg_list.append(item)
+        if key not in seen:
+            seen.add(key)
+        else:
+            print("Duplicate key: " + key)
+
+    return msg_list
 
 
 def update(messages, template, source_messages):
-   new_translation = template
-   template_messages = parse_messages(template)
-   for tp_msg in template_messages:
-      old_msg = tp_msg['msg']
-      if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]:
-         tp_msg_val = tp_msg['val']
-         tl_msg_val = messages[tp_msg['key']]
-         tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '') # escape
-         if tp_msg['key'].find('_QT_') < 0:
-            tl_msg_val = c89_cut(tl_msg_val)
-         # Replace last match, incase the key contains the value string
-         new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1]
-         new_translation = new_translation.replace(old_msg, new_msg)
-      # Remove English duplicates and non-translateable strings
-      else:
-         new_translation = new_translation.replace(old_msg + '\n', '')
-   return new_translation
+    translation = template
+    template_messages = parse_messages(template)
+    for tp_msg in template_messages:
+        old_msg = tp_msg['msg']
+        if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]:
+            tp_msg_val = tp_msg['val']
+            tl_msg_val = messages[tp_msg['key']]
+            tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '')  # escape
+            if tp_msg['key'].find('_QT_') < 0:
+                tl_msg_val = c89_cut(tl_msg_val)
+            # Replace last match, in case the key contains the value string
+            new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1]
+            translation = translation.replace(old_msg, new_msg)
+        # Remove English duplicates and non-translatable strings
+        else:
+            translation = translation.replace(old_msg + '\n', '')
+    return translation
 
 
-with open('msg_hash_us.h', 'r') as template_file:
-   template = template_file.read()
-   with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file:
-      source_messages = json.load(source_json_file)
-      with open(json_filename, 'r+', encoding='utf-8') as json_file:
-         messages = json.load(json_file)
-         new_translation = update(messages, template, source_messages)
-         with open(h_filename, 'w', encoding='utf-8') as h_file:
-            h_file.seek(0)
-            h_file.write(new_translation)
-            h_file.truncate()
+with open('msg_hash_us.h', 'r', encoding='utf-8') as template_file:
+    template = template_file.read()
+    with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file:
+        source_messages = json.load(source_json_file)
+        with open(json_filename, 'r+', encoding='utf-8') as json_file:
+            messages = json.load(json_file)
+            new_translation = update(messages, template, source_messages)
+            with open(h_filename, 'w', encoding='utf-8') as h_file:
+                h_file.seek(0)
+                h_file.write(new_translation)
+                h_file.truncate()