RetroArch/intl/json2h.py
Michael Burgardt 648b0ab7b7
(Bug fix) Fix the weird 'empty translations' & correct backslash escaping in translation strings (#14609)
* (Bug fix) Fix the weird 'empty translations' & correct backslash escaping in translation strings

* Update crowdin workflows
GitHub is deprecating Node.js 12, so some actions need to be updated to ensure functionality into the future.
2022-11-11 07:56:17 +01:00

119 lines
3.9 KiB
Python
Executable File

#!/usr/bin/env python3
# Convert *.json to *.h
# Usage: ./json2h.py msg_hash_fr.json
import re
import sys
import json
try:
json_filename = sys.argv[1]
h_filename = json_filename.replace('.json', '.h')
except IndexError:
print("Usage: ./template.py <language_postfix>")
sys.exit(1)
if json_filename == 'msg_hash_us.json' or json_filename == 'msg_hash_lbl.json':
print("Skip")
sys.exit(0)
p = re.compile(
r'MSG_HASH\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\(\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*[a-zA-Z0-9_]+\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*,\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\".*\"\s*(?:\/\*(?:.|[\r\n])*?\*\/\s*)*\)')
def c89_cut(old_str):
if old_str.endswith('[...]'):
return old_str
new_str = ''
byte_count = 0
for c in old_str:
byte_count += len(c.encode('utf-8'))
if byte_count > 500:
return new_str + '[...]'
new_str += c
return new_str
def parse_message(message):
# remove all comments before the value (= the string)
a = message.find('/*')
b = message.find('*/')
c = message.find('"')
new_msg = message
while (a >= 0 and b >= 0) and (a < c < b or b < c):
new_msg = new_msg[:a] + new_msg[b + 2:]
c = new_msg.find('"', a)
b = new_msg.find('*/', a)
a = new_msg.find('/*', a)
# get key word
word = new_msg[new_msg.find('(') + 1:new_msg.find(',')].strip()
# remove all comments after the value (= the string)
a = new_msg.rfind('/*')
b = new_msg.rfind('*/')
d = new_msg.rfind('"')
while (a >= 0 and b >= 0) and (a < d < b or a > d):
new_msg = new_msg[:a]
a = new_msg.rfind('/*')
b = new_msg.rfind('*/')
d = new_msg.rfind('"')
# get value
value = new_msg[c + 1:d]
return word, value
def parse_messages(text):
result = p.findall(text)
seen = set()
msg_list = []
for msg in result:
key, val = parse_message(msg)
item = {'key': key, 'val': val, 'msg': msg}
msg_list.append(item)
if key not in seen:
seen.add(key)
else:
print("Duplicate key: " + key)
return msg_list
def update(messages, template, source_messages):
translation = template
template_messages = parse_messages(template)
for tp_msg in template_messages:
old_msg = tp_msg['msg']
if tp_msg['key'] in messages and messages[tp_msg['key']] != source_messages[tp_msg['key']]:
tp_msg_val = tp_msg['val']
tl_msg_val = messages[tp_msg['key']]
# escape all \
tl_msg_val = tl_msg_val.replace('\\', r'\\')
# remove "double-dipping" on escape sequences
tl_msg_val = re.sub(r'\\\\(?=[nrt])', r'\\', tl_msg_val)
# escape other symbols
tl_msg_val = tl_msg_val.replace('"', '\\\"').replace('\n', '')
if tp_msg['key'].find('_QT_') < 0:
tl_msg_val = c89_cut(tl_msg_val)
# Replace last match, in case the key contains the value string
new_msg = old_msg[::-1].replace(tp_msg_val[::-1], tl_msg_val[::-1], 1)[::-1]
translation = translation.replace(old_msg, new_msg)
# Remove English duplicates and non-translatable strings
else:
translation = translation.replace(old_msg + '\n', '')
return translation
with open('msg_hash_us.h', 'r', encoding='utf-8') as template_file:
template = template_file.read()
with open('msg_hash_us.json', 'r+', encoding='utf-8') as source_json_file:
source_messages = json.load(source_json_file)
with open(json_filename, 'r+', encoding='utf-8') as json_file:
messages = json.load(json_file)
new_translation = update(messages, template, source_messages)
with open(h_filename, 'w', encoding='utf-8') as h_file:
h_file.seek(0)
h_file.write(new_translation)
h_file.truncate()