Improve text encoding/decoding

Eliminated automatically generated .inc files for fixed-length text arrays.
Improved python code for extracting, decoding, and encoding text.
Finished implementing the DTE optimization algorithm, only to find that it's no better than the vanilla algorithm!
This commit is contained in:
everything8215 2024-08-12 15:58:20 -04:00
parent 56b32333a7
commit 0ece2bcbef
86 changed files with 732 additions and 610 deletions

View File

@ -6,6 +6,8 @@ ATTACK_MSG_INC = 1
.scope AttackMsg
Start := bank_start AttackMsg
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,6 +6,8 @@ ATTACK_MSG_INC = 1
.scope AttackMsg
Start := bank_start AttackMsg
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,12 +6,9 @@ ATTACK_NAME_INC = 1
.scope AttackName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 175
ITEM_SIZE = 10
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ ATTACK_NAME_INC = 1
.scope AttackName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 175
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ BATTLE_CMD_NAME_INC = 1
.scope BattleCmdName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 32
ITEM_SIZE = 7
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ BATTLE_CMD_NAME_INC = 1
.scope BattleCmdName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 32
ITEM_SIZE = 6
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,6 +6,8 @@ BATTLE_DLG_INC = 1
.scope BattleDlg
Start := bank_start BattleDlg
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,6 +6,8 @@ BATTLE_DLG_INC = 1
.scope BattleDlg
Start := bank_start BattleDlg
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,12 +6,9 @@ BUSHIDO_NAME_INC = 1
.scope BushidoName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 8
ITEM_SIZE = 12
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ BUSHIDO_NAME_INC = 1
.scope BushidoName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 8
ITEM_SIZE = 6
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ CHAR_NAME_INC = 1
.scope CharName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 64
ITEM_SIZE = 6
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ CHAR_NAME_INC = 1
.scope CharName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 64
ITEM_SIZE = 6
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ CHAR_TITLE_INC = 1
.scope CharTitle
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 64
ITEM_SIZE = 7
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ DANCE_NAME_INC = 1
.scope DanceName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 8
ITEM_SIZE = 12
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ DANCE_NAME_INC = 1
.scope DanceName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 8
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,7 +6,7 @@ DLG1_INC = 1
.scope Dlg1
Start = Dlg1
Start := bank_start Dlg1
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###

View File

@ -6,6 +6,8 @@ DLG1_INC = 1
.scope Dlg1
Start := bank_start Dlg1
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,6 +6,8 @@ DLG2_INC = 1
.scope Dlg2
Start := bank_start Dlg2
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,6 +6,8 @@ DLG2_INC = 1
.scope Dlg2
Start := bank_start Dlg2
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,12 +6,9 @@ GENJU_ATTACK_NAME_INC = 1
.scope GenjuAttackName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 27
ITEM_SIZE = 10
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ GENJU_ATTACK_NAME_INC = 1
.scope GenjuAttackName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 27
ITEM_SIZE = 9
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ GENJU_BONUS_NAME_INC = 1
.scope GenjuBonusName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 17
ITEM_SIZE = 9
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ GENJU_BONUS_NAME_INC = 1
.scope GenjuBonusName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 17
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ GENJU_NAME_INC = 1
.scope GenjuName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 27
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ GENJU_NAME_INC = 1
.scope GenjuName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 27
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ ITEM_NAME_INC = 1
.scope ItemName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 256
ITEM_SIZE = 13
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ ITEM_NAME_INC = 1
.scope ItemName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 256
ITEM_SIZE = 9
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ ITEM_TYPE_NAME_INC = 1
.scope ItemTypeName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 16
ITEM_SIZE = 7
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ MAGIC_NAME_INC = 1
.scope MagicName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 54
ITEM_SIZE = 7
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ MAGIC_NAME_INC = 1
.scope MagicName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 54
ITEM_SIZE = 5
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,6 +6,8 @@ MONSTER_DLG_INC = 1
.scope MonsterDlg
Start := bank_start MonsterDlg
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,6 +6,8 @@ MONSTER_DLG_INC = 1
.scope MonsterDlg
Start := bank_start MonsterDlg
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################

View File

@ -6,12 +6,9 @@ MONSTER_NAME_INC = 1
.scope MonsterName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 384
ITEM_SIZE = 10
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ MONSTER_NAME_INC = 1
.scope MonsterName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 384
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ MONSTER_SPECIAL_NAME_INC = 1
.scope MonsterSpecialName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 384
ITEM_SIZE = 10
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ MONSTER_SPECIAL_NAME_INC = 1
.scope MonsterSpecialName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 384
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ RARE_ITEM_NAME_INC = 1
.scope RareItemName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 20
ITEM_SIZE = 13
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ RARE_ITEM_NAME_INC = 1
.scope RareItemName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 30
ITEM_SIZE = 8
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ STATUS_NAME_INC = 1
.scope StatusName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 32
ITEM_SIZE = 10
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -6,12 +6,9 @@ STATUS_NAME_INC = 1
.scope StatusName
; ##############################################################################
; ### AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY ###
; ##############################################################################
; ##############################################################################
ARRAY_LENGTH = 32
ITEM_SIZE = 7
SIZE = ARRAY_LENGTH * ITEM_SIZE
.endscope

View File

@ -1,6 +1,5 @@
{
"asset_label": "AttackName",
"inc_path": "include/text/attack_name_en.inc",
"item_size": 10,
"char_tables": [
"fixed_length_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "AttackName",
"inc_path": "include/text/attack_name_jp.inc",
"item_size": 8,
"char_tables": [
"fixed_length_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "BattleCmdName",
"inc_path": "include/text/battle_cmd_name_en.inc",
"item_size": 7,
"char_tables": [
"fixed_length_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "BattleCmdName",
"inc_path": "include/text/battle_cmd_name_jp.inc",
"item_size": 6,
"char_tables": [
"fixed_length_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "BushidoName",
"inc_path": "include/text/bushido_name_en.inc",
"item_size": 12,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "BushidoName",
"inc_path": "include/text/bushido_name_jp.inc",
"item_size": 6,
"char_tables": [
"bushido_name"

View File

@ -1,6 +1,5 @@
{
"asset_label": "CharName",
"inc_path": "include/text/char_name_en.inc",
"item_size": 6,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "CharName",
"inc_path": "include/text/char_name_jp.inc",
"item_size": 6,
"char_tables": [
"fixed_length_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "CharTitle",
"inc_path": "include/text/char_title_jp.inc",
"item_size": 7,
"char_tables": [
"fixed_length_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "DanceName",
"inc_path": "include/text/dance_name_en.inc",
"item_size": 12,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "DanceName",
"inc_path": "include/text/dance_name_jp.inc",
"item_size": 8,
"char_tables": [
"fixed_length_jp",

View File

@ -0,0 +1,137 @@
{
"asset_label": "DTETbl",
"item_size": 2,
"char_tables": [
"dialog_en"
],
"text": [
"e ",
" t",
"he",
"th",
"t ",
"s ",
"er",
" a",
": ",
"re",
"ou",
"in",
"d ",
" w",
" h",
" s",
"an",
"o ",
"n ",
"r ",
"ha",
" o",
"to",
"on",
" i",
"at",
" m",
", ",
"ng",
"ve",
"st",
"y ",
" b",
"ar",
"or",
"me",
"hi",
"yo",
"en",
"ea",
"ll",
"is",
" y",
" f",
"it",
"ed",
"l ",
"es",
" I",
"nd",
"le",
" c",
"se",
"ne",
"te",
"a ",
"'s",
"as",
" l",
"al",
" p",
"I ",
"g ",
"ur",
"pe",
"ow",
"om",
" d",
"Th",
" g",
"et",
"f ",
"ro",
"be",
"u ",
"wa",
"us",
"ut",
"rs",
" r",
"ri",
"nt",
"li",
"we",
"of",
"el",
"ma",
"ho",
"ee",
"ta",
" n",
"de",
"h ",
"oo",
"gh",
"m ",
"ca",
"ra",
"so",
"no",
"ti",
"e!",
"ld",
"ir",
"ay",
"n'",
"av",
"lo",
"fo",
"! ",
" E",
" u",
"ce",
"co",
" W",
"ac",
"ke",
"e_",
"e'",
"ch",
"I'",
" T",
"un",
"ig",
"il",
" e",
"ai",
". "
]
}

View File

@ -1,6 +1,5 @@
{
"asset_label": "GenjuAttackName",
"inc_path": "include/text/genju_attack_name_en.inc",
"item_size": 10,
"char_tables": [
"big_symbols_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "GenjuAttackName",
"inc_path": "include/text/genju_attack_name_jp.inc",
"item_size": 9,
"char_tables": [
"fixed_length_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "GenjuBonusName",
"inc_path": "include/text/genju_bonus_name_en.inc",
"item_size": 9,
"char_tables": [
"small_symbols_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "GenjuBonusName",
"inc_path": "include/text/genju_bonus_name_jp.inc",
"item_size": 8,
"char_tables": [
"small_symbols_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "GenjuName",
"inc_path": "include/text/genju_name_en.inc",
"item_size": 8,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "GenjuName",
"inc_path": "include/text/genju_name_jp.inc",
"item_size": 8,
"char_tables": [
"small_symbols_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "ItemName",
"inc_path": "include/text/item_name_en.inc",
"item_size": 13,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "ItemName",
"inc_path": "include/text/item_name_jp.inc",
"item_size": 9,
"char_tables": [
"small_symbols_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "ItemTypeName",
"inc_path": "include/text/item_type_name_en.inc",
"item_size": 7,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "MagicName",
"inc_path": "include/text/magic_name_en.inc",
"item_size": 7,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "MagicName",
"inc_path": "include/text/magic_name_jp.inc",
"item_size": 5,
"char_tables": [
"small_symbols_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "MonsterName",
"inc_path": "include/text/monster_name_en.inc",
"item_size": 10,
"char_tables": [
"fixed_length_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "MonsterName",
"inc_path": "include/text/monster_name_jp.inc",
"item_size": 8,
"char_tables": [
"small_symbols_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "MonsterSpecialName",
"inc_path": "include/text/monster_special_name_en.inc",
"item_size": 10,
"char_tables": [
"fixed_length_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "MonsterSpecialName",
"inc_path": "include/text/monster_special_name_jp.inc",
"item_size": 8,
"char_tables": [
"fixed_length_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "RareItemName",
"inc_path": "include/text/rare_item_name_en.inc",
"item_size": 13,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "RareItemName",
"inc_path": "include/text/rare_item_name_jp.inc",
"item_size": 8,
"char_tables": [
"small_symbols_jp",

View File

@ -1,6 +1,5 @@
{
"asset_label": "StatusName",
"inc_path": "include/text/status_name_en.inc",
"item_size": 10,
"char_tables": [
"text_en",

View File

@ -1,6 +1,5 @@
{
"asset_label": "StatusName",
"inc_path": "include/text/status_name_jp.inc",
"item_size": 7,
"char_tables": [
"small_symbols_jp",

View File

@ -9,6 +9,5 @@
"0xD4": "『",
"0xD5": "",
"0xD6": "",
"0xD7": "・",
"0xD8": "……"
"0xD7": "・"
}

View File

@ -166,7 +166,11 @@
"0xC4": "ァ",
"0xC5": "ー",
"0xC6": "ィ",
"0xC7": "…",
"0xC7": [
"_",
"_",
"…"
],
"0xC8": "ゥ",
"0xC9": "",
"0xCA": "ェ",

View File

@ -1,4 +1,9 @@
{
"0xD8": [
"__",
"__",
"……"
],
"0xD9": "帝国",
"0xDA": "って",
"0xDB": "った",

View File

@ -5,83 +5,6 @@ import os
import sys
import romtools as rt
ASM_INDENT = ' ' * 8
def encode_text(asset_def):
# create the text codec
char_table = {}
for char_table_name in asset_def['char_tables']:
char_table_path = os.path.join('tools', 'char_table', char_table_name + '.json')
with open(char_table_path, 'r', encoding='utf8') as char_table_file:
char_table.update(json.load(char_table_file))
text_codec = rt.TextCodec(char_table)
# encode each string
encoded_bytes = bytearray()
item_offsets = []
for text_item in asset_def['text']:
encoded_text = text_codec.encode_text(text_item)
if 'item_size' in asset_def:
# fixed length strings
item_size = asset_def['item_size']
# check if text is too long
assert len(encoded_text) <= item_size, \
f'Text string \"{text_item}\" too long by ' \
f'{len(encoded_text) - item_size} char(s)'
# pad the text
if len(encoded_text) != item_size:
assert '{pad}' in text_codec.encoding_table, \
f'Padding not found in char table'
pad_char = text_codec.encoding_table['{pad}']
item_size = asset_def['item_size']
while len(encoded_text) < item_size:
encoded_text.append(pad_char)
item_offsets.append(len(encoded_bytes))
encoded_bytes += encoded_text
elif 'is_sequential' in asset_def:
# items must be sequential, don't allow shared items
item_offsets.append(len(encoded_bytes))
encoded_bytes += encoded_text
else:
# allow shared items
shared_offset = encoded_bytes.find(encoded_text)
if shared_offset == -1:
item_offsets.append(len(encoded_bytes))
encoded_bytes += encoded_text
else:
item_offsets.append(shared_offset)
return item_offsets, encoded_bytes
def update_text_inc(asset_def, item_offsets):
asset_label = asset_def['asset_label']
# define the array length
inc_text = ASM_INDENT + f'ARRAY_LENGTH = {len(item_offsets)}\n'
if 'item_size' in asset_def:
# fixed item size
inc_text += ASM_INDENT + f'ITEM_SIZE = '
inc_text += str(asset_def['item_size']) + '\n'
else:
# define item offsets
inc_text += '\n'
for id, offset in enumerate(item_offsets):
inc_text += ASM_INDENT + f'_%d := ' % id
inc_text += f'{asset_label} + $%04x\n' % offset
# update item offsets in the include file
rt.insert_asm(asset_def['inc_path'], inc_text)
if __name__ == '__main__':
@ -91,14 +14,11 @@ if __name__ == '__main__':
with open(asset_path, 'r', encoding='utf8') as json_file:
asset_def = json.load(json_file)
item_offsets, encoded_bytes = encode_text(asset_def)
# encode the text
encoded_bytes, _ = rt.encode_text(asset_def)
# write the encoded binary data to the data path
asset_root, _ = os.path.splitext(asset_path)
dat_path = asset_root + '.dat'
with open(dat_path, 'wb') as f:
f.write(encoded_bytes)
# if necessary, update the offsets in the include file
if 'inc_path' in asset_def:
update_text_inc(asset_def, item_offsets)

View File

@ -7,9 +7,246 @@ import romtools as rt
from ff6_lzss import *
from monster_stencil import apply_stencil
class AssetExtractor:
def __init__(self, rom_bytes, map_mode):
self.rom_bytes = rom_bytes
self.memory_map = rt.MemoryMap(map_mode)
def extract_object(self, asset_range, **kwargs):
# calculate the appropriate ROM range using the mapper
unmapped_range = rt.Range(asset_range)
mapped_range = self.memory_map.map_range(unmapped_range)
# extract the asset data
asset_bytes = self.rom_bytes[mapped_range.begin:mapped_range.end + 1]
# make a list of pointers for each item in the asset
pointer_list = []
if 'ptr_range' in kwargs:
# array with a pointer table
is_mapped = kwargs.get('is_mapped', False)
ptr_offset = kwargs.get('ptr_offset', 0)
if isinstance(ptr_offset, str):
ptr_offset = int(ptr_offset, 0)
if not is_mapped:
# map the pointer offset first, then add pointers
ptr_offset = self.memory_map.map_address(ptr_offset)
# extract the pointer table data
ptr_range = rt.Range(kwargs['ptr_range'])
ptr_range = self.memory_map.map_range(ptr_range)
ptr_data = self.rom_bytes[ptr_range.begin:ptr_range.end + 1]
ptr_size = kwargs.get('ptr_size', 2)
assert len(ptr_data) % ptr_size == 0, 'Pointer table length' \
+ ' is not divisible by pointer size'
array_length = len(ptr_data) // ptr_size
for i in range(array_length):
pointer = ptr_data[i * ptr_size]
if ptr_size > 1:
pointer |= ptr_data[i * ptr_size + 1] << 8
if ptr_size > 2:
pointer |= ptr_data[i * ptr_size + 2] << 16
if ptr_size > 3:
pointer |= ptr_data[i * ptr_size + 3] << 24
pointer += ptr_offset
if is_mapped:
# map pointer after adding pointer offset
pointer = self.memory_map.map_address(pointer)
pointer_list.append(pointer - mapped_range.begin)
elif 'item_offsets' in kwargs:
# items with specified offsets
item_offsets = kwargs['item_offsets']
array_length = len(item_offsets)
for begin in item_offsets:
if isinstance(begin, str):
begin = int(begin, 0)
begin = self.memory_map.map_address(begin)
pointer_list.append(begin - mapped_range.begin)
elif 'terminator' in kwargs:
# terminated items
terminator = kwargs['terminator']
if isinstance(terminator, str):
terminator = int(terminator, 0)
pointer_list.append(0)
for p in range(len(asset_bytes) - 1):
if asset_bytes[p] == terminator:
pointer_list.append(p + 1)
array_length = len(pointer_list)
elif 'item_size' in kwargs:
# fixed item size
item_size = kwargs['item_size']
if isinstance(item_size, str):
item_size = int(item_size, 0)
assert len(asset_bytes) % item_size == 0, \
'Fixed-length array size mismatch'
array_length = len(asset_bytes) // item_size
for i in range(array_length):
pointer_list.append(i * item_size)
else:
# single object
pointer_list.append(0)
array_length = 1
# remove duplicates and sort pointers
sorted_pointers = sorted(set(pointer_list))
# create a list of pointer ranges (these don't correspond with item
# ranges for terminated and sequential items)
pointer_ranges = {}
for p, pointer in enumerate(sorted_pointers):
begin = pointer
if p == len(sorted_pointers) - 1:
end = len(asset_bytes) - 1
else:
end = sorted_pointers[p + 1] - 1
pointer_ranges[begin] = rt.Range(begin, end)
# create ranges for each item
item_ranges = []
for i in range(array_length):
begin = pointer_list[i]
if 'terminator' in kwargs:
# item range goes until terminator is found
end = begin
terminator = kwargs['terminator']
if isinstance(terminator, str):
terminator = int(terminator, 0)
while end < len(asset_bytes):
if asset_bytes[end] == terminator:
break
end = end + 1
item_ranges.append(rt.Range(begin, end))
elif kwargs.get('is_sequential', False):
if i != array_length - 1:
# item range goes up to next sequential pointer
end = pointer_list[i + 1] - 1
else:
# last item goes up to end of asset range
end = len(asset_bytes) - 1
item_ranges.append(rt.Range(begin, end))
else:
# otherwise, item range is same as pointer range
item_ranges.append(pointer_ranges[begin])
return asset_bytes, item_ranges
def write_asset_file(self, asset_bytes, asset_path):
# create directories
os.makedirs(os.path.dirname(asset_path), exist_ok=True)
# decompress the data, if necessary
if asset_path.endswith('.lz'):
with open(asset_path[:-3], 'wb') as f:
f.write(decode_lzss(asset_bytes))
# save the raw data
with open(asset_path, 'wb') as f:
f.write(asset_bytes)
def extract_text(self, json_path, asset_range, **kwargs):
# read the json file
with open(json_path, 'r', encoding='utf8') as json_file:
asset_def = json.load(json_file)
if 'item_size' in asset_def:
kwargs['item_size'] = asset_def['item_size']
asset_root, _ = os.path.splitext(json_path)
# check if the data file already exists
dat_path = asset_root + '.dat'
if os.path.exists(dat_path):
return
# otherwise, we need to extract the text and create the data file
print(f'{asset_range} -> {json_path}')
# extract the text from the ROM
asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
# write data file
self.write_asset_file(asset_bytes, dat_path)
# update include file
rt.update_array_inc(asset_bytes, item_ranges, **asset_def)
# create the text codec
text_codec = rt.TextCodec(asset_def)
# decode the text strings
text_list = []
for item_range in item_ranges:
item_bytes = asset_bytes[item_range.begin:item_range.end + 1]
text_list.append(text_codec.decode(item_bytes))
asset_def['text'] = text_list
# write text strings to the asset file
asset_json = json.dumps(asset_def, ensure_ascii=False, indent=2)
with open(json_path, 'w', encoding='utf8') as f:
f.write(asset_json)
def extract_array(self, file_path, asset_range, **kwargs):
# extract the array data from the ROM
asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
if os.path.exists(file_path):
return
# write data file
print(f'{asset_range} -> {file_path}')
self.write_asset_file(asset_bytes, file_path)
# check if an include file exists
rt.update_array_inc(asset_bytes, item_ranges, **kwargs)
def extract_asset(self, file_path, asset_range, **kwargs):
# extract the asset from the ROM
asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
# generate a list of file names
if 'file_list' in kwargs:
file_list = kwargs['file_list']
assert len(file_list) == len(item_ranges)
else:
file_list = [('%04x' % i) for i in range(len(item_ranges))]
path_list = [
file_path.replace('%s', file_list[i])
for i in range(len(item_ranges))
]
extracted_one = False
for i, item_range in enumerate(item_ranges):
if os.path.exists(path_list[i]):
continue
if item_range.is_empty() or item_range.begin < 0:
continue
if not extracted_one:
extracted_one = True
print(f'{asset_range} -> {file_path}')
gfx_bytes = asset_bytes[item_range.begin:item_range.end + 1]
self.write_asset_file(gfx_bytes, path_list[i])
def extract_rom(rom_bytes, language):
ae = rt.AssetExtractor(rom_bytes, 'hirom')
ae = AssetExtractor(rom_bytes, 'hirom')
# load rip info
rip_list_path = os.path.join('tools', f'rip_list_{language}.json')
@ -66,7 +303,6 @@ def extract_rom(rom_bytes, language):
if __name__ == '__main__':
memory_map = rt.MemoryMap('hirom')
# search the vanilla directory for valid ROM files
dir_list = os.listdir('vanilla')

View File

@ -2,18 +2,97 @@
import romtools as rt
import os
import re
import sys
import json
from encode_text import encode_text, update_text_inc
ESCAPE_REGEX = r'{(\w+)(?:\:(\w+))?}'
def escape_len(code):
param_match = re.match(ESCAPE_REGEX, code)
if param_match is None or param_match.group(2) == None:
return 1
elif param_match.group(2) == 'b':
return 2
elif param_match.group(2) == 'w':
return 3
else:
return 1
def optimize_dte(dlg_def):
# make sure there is a DTE table
char_tables = dlg_def['char_tables']
if 'dte' not in char_tables:
print("Can't optimize DTE")
return
# create a text codec without DTE
char_tables.remove('dte')
dlg_def['char_tables'] = char_tables
# encode all of the dialogue
dlg_bytes, _ = rt.encode_text(dlg_def)
# find all valid pairs of characters
text_codec = rt.TextCodec(dlg_def)
dte_pairs = {}
i = 0
while i < len(dlg_bytes) - 2:
first_code = dlg_bytes[i]
first_char = text_codec.decoding_table[first_code]
if first_char[0] == '{':
i += escape_len(first_char)
continue
second_code = dlg_bytes[i + 1]
second_char = text_codec.decoding_table[second_code]
if second_char[0] == '{':
i += escape_len(second_char) + 1
continue
pair = first_char + second_char
i += 2
if pair in dte_pairs:
dte_pairs[pair] += 1
else:
dte_pairs[pair] = 1
# choose the 128 most common pairs
sorted_pairs = sorted(dte_pairs.items(), key=lambda pair: pair[1], reverse=True)
dte_char_table = {}
print('Most common char pairs:')
for i in range(128):
pair = sorted_pairs[i]
dte_char_table[rt.hex_string(i + 128)] = pair[0]
print(pair[0], pair[1])
# update the dte char table
dte_char_table_path = os.path.join('tools', 'char_table', 'dte.json')
with open(dte_char_table_path, 'w') as dte_char_table_file:
dte_char_table_file.write(json.dumps(dte_char_table, ensure_ascii=False, indent=2))
# update the dte text file
dte_list = [item[0] for item in sorted_pairs[:128]]
dte_json_path = os.path.join('src', 'text', 'dte_tbl_en.json')
with open(dte_json_path, 'r', encoding='utf8') as dte_json_file:
dte_json = json.load(dte_json_file)
dte_json['text'] = dte_list
with open(dte_json_path, 'w', encoding='utf8') as dte_json_file:
dte_json_file.write(json.dumps(dte_json, ensure_ascii=False, indent=2))
def split_dlg(dlg1_def, dlg2_def):
# find the first dialog offset beyond the first bank
item_offsets, _ = encode_text(dlg1_def)
_, item_ranges = rt.encode_text(dlg1_def)
bank_inc = len(dlg1_def['text'])
for index, offset in enumerate(item_offsets):
if offset >= 0x010000:
for index, range in enumerate(item_ranges):
if range.begin >= 0x010000:
bank_inc = index
break
@ -48,6 +127,11 @@ if __name__ == '__main__':
if dlg_cmd == 'split':
split_dlg(dlg1_def, dlg2_def)
# save both dialogue json files
with open(dlg1_path, 'w', encoding='utf8') as dlg1_file, open(dlg2_path, 'w', encoding='utf8') as dlg2_file:
dlg1_file.write(json.dumps(dlg1_def, ensure_ascii=False, indent=2))
dlg2_file.write(json.dumps(dlg2_def, ensure_ascii=False, indent=2))
elif dlg_cmd == 'combine':
combine_dlg(dlg1_def, dlg2_def)
@ -55,10 +139,16 @@ if __name__ == '__main__':
os.utime(dlg1_dat_path)
os.utime(dlg2_dat_path)
# save both dialogue json files
with open(dlg1_path, 'w', encoding='utf8') as dlg1_file, open(dlg2_path, 'w', encoding='utf8') as dlg2_file:
dlg1_file.write(json.dumps(dlg1_def, ensure_ascii=False, indent=2))
dlg2_file.write(json.dumps(dlg2_def, ensure_ascii=False, indent=2))
elif dlg_cmd == 'dte':
optimize_dte(dlg1_def)
os.utime(dlg1_path)
os.utime(dlg2_path)
else:
raise ValueError('Invalid command:', dlg_cmd)
# save both dialogue json files
with open(dlg1_path, 'w', encoding='utf8') as dlg1_file, open(dlg2_path, 'w', encoding='utf8') as dlg2_file:
dlg1_file.write(json.dumps(dlg1_def, ensure_ascii=False, indent=2))
dlg2_file.write(json.dumps(dlg2_def, ensure_ascii=False, indent=2))

View File

@ -1,11 +0,0 @@
#!/usr/bin/env python3
import os
import sys
import romtools as rt
# def optimize_dte():
if __name__ == '__main__':
dlg_path = sys.argv[1]

View File

@ -1,7 +1,7 @@
from romtools.range import Range
from romtools.memory_map import MemoryMap
from romtools.asset_extractor import AssetExtractor
from romtools.text_codec import TextCodec
from romtools.text_codec import TextCodec, encode_text
from romtools.hex_string import hex_string
from romtools.bytes_to_asm import bytes_to_asm
from romtools.insert_asm import insert_asm
from romtools.update_array_inc import update_array_inc

View File

@ -1,285 +0,0 @@
import romtools as rt
import json
import os
from ff6_lzss import *
ASM_INDENT = ' ' * 8
class AssetExtractor:
def __init__(self, rom_bytes, map_mode):
self.rom_bytes = rom_bytes
self.memory_map = rt.MemoryMap(map_mode)
def extract_object(self, asset_range, **kwargs):
# calculate the appropriate ROM range using the mapper
unmapped_range = rt.Range(asset_range)
mapped_range = self.memory_map.map_range(unmapped_range)
# extract the asset data
asset_bytes = self.rom_bytes[mapped_range.begin:mapped_range.end + 1]
# make a list of pointers for each item in the asset
pointer_list = []
if 'ptr_range' in kwargs:
# array with a pointer table
is_mapped = kwargs.get('is_mapped', False)
ptr_offset = kwargs.get('ptr_offset', 0)
if isinstance(ptr_offset, str):
ptr_offset = int(ptr_offset, 0)
if not is_mapped:
# map the pointer offset first, then add pointers
ptr_offset = self.memory_map.map_address(ptr_offset)
# extract the pointer table data
ptr_range = rt.Range(kwargs['ptr_range'])
ptr_range = self.memory_map.map_range(ptr_range)
ptr_data = self.rom_bytes[ptr_range.begin:ptr_range.end + 1]
ptr_size = kwargs.get('ptr_size', 2)
assert len(ptr_data) % ptr_size == 0, 'Pointer table length' \
+ ' is not divisible by pointer size'
array_length = len(ptr_data) // ptr_size
for i in range(array_length):
pointer = ptr_data[i * ptr_size]
if ptr_size > 1:
pointer |= ptr_data[i * ptr_size + 1] << 8
if ptr_size > 2:
pointer |= ptr_data[i * ptr_size + 2] << 16
if ptr_size > 3:
pointer |= ptr_data[i * ptr_size + 3] << 24
pointer += ptr_offset
if is_mapped:
# map pointer after adding pointer offset
pointer = self.memory_map.map_address(pointer)
pointer_list.append(pointer - mapped_range.begin)
elif 'item_offsets' in kwargs:
# items with specified offsets
item_offsets = kwargs['item_offsets']
array_length = len(item_offsets)
for begin in item_offsets:
if isinstance(begin, str):
begin = int(begin, 0)
begin = self.memory_map.map_address(begin)
pointer_list.append(begin - mapped_range.begin)
elif 'terminator' in kwargs:
# terminated items
terminator = kwargs['terminator']
if isinstance(terminator, str):
terminator = int(terminator, 0)
pointer_list.append(0)
for p in range(len(asset_bytes) - 1):
if asset_bytes[p] == terminator:
pointer_list.append(p + 1)
array_length = len(pointer_list)
elif 'item_size' in kwargs:
# fixed item size
item_size = kwargs['item_size']
if isinstance(item_size, str):
item_size = int(item_size, 0)
assert len(asset_bytes) % item_size == 0, \
'Fixed-length array size mismatch'
array_length = len(asset_bytes) // item_size
for i in range(array_length):
pointer_list.append(i * item_size)
else:
# single object
pointer_list.append(0)
array_length = 1
# remove duplicates and sort pointers
sorted_pointers = sorted(set(pointer_list))
# create a list of pointer ranges (these don't correspond with item
# ranges for terminated and sequential items)
pointer_ranges = {}
for p, pointer in enumerate(sorted_pointers):
begin = pointer
if p == len(sorted_pointers) - 1:
end = len(asset_bytes) - 1
else:
end = sorted_pointers[p + 1] - 1
pointer_ranges[begin] = rt.Range(begin, end)
# create ranges for each item
item_ranges = []
for i in range(array_length):
begin = pointer_list[i]
if 'terminator' in kwargs:
# item range goes until terminator is found
end = begin
terminator = kwargs['terminator']
if isinstance(terminator, str):
terminator = int(terminator, 0)
while end < len(asset_bytes):
if asset_bytes[end] == terminator:
break
end = end + 1
item_ranges.append(rt.Range(begin, end))
elif kwargs.get('is_sequential', False):
if i != array_length - 1:
# item range goes up to next sequential pointer
end = pointer_list[i + 1] - 1
else:
# last item goes up to end of asset range
end = len(asset_bytes) - 1
item_ranges.append(rt.Range(begin, end))
else:
# otherwise, item range is same as pointer range
item_ranges.append(pointer_ranges[begin])
return asset_bytes, item_ranges
def write_asset_file(self, asset_bytes, asset_path):
# create directories
os.makedirs(os.path.dirname(asset_path), exist_ok=True)
# decompress the data, if necessary
if asset_path.endswith('.lz'):
with open(asset_path[:-3], 'wb') as f:
f.write(decode_lzss(asset_bytes))
# save the raw data
with open(asset_path, 'wb') as f:
f.write(asset_bytes)
def extract_text(self, json_path, asset_range, **kwargs):
# read the json file
with open(json_path, 'r', encoding='utf8') as json_file:
asset_def = json.load(json_file)
if 'item_size' in asset_def:
kwargs['item_size'] = asset_def['item_size']
asset_label = asset_def['asset_label']
asset_root, _ = os.path.splitext(json_path)
# check if the data file already exists
dat_path = asset_root + '.dat'
if os.path.exists(dat_path):
return
# extract the text from the ROM
asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
# write data file
print(f'{asset_range} -> {json_path}')
self.write_asset_file(asset_bytes, dat_path)
# check if an include file exists
if 'inc_path' in asset_def:
inc_path = asset_def['inc_path']
assert os.path.exists(inc_path), f'Missing include file: {inc_path}'
# define the size
inc_text = ASM_INDENT + f'SIZE = {len(asset_bytes)}\n'
# define the array length
inc_text += ASM_INDENT + f'ARRAY_LENGTH = {len(item_ranges)}\n'
if 'item_size' in asset_def:
# fixed item size
inc_text += ASM_INDENT + f'ITEM_SIZE = '
inc_text += str(asset_def['item_size']) + '\n'
else:
# define item offsets
inc_text += '\n'
for id, item_range in enumerate(item_ranges):
inc_text += ASM_INDENT + '_%d := ' % id
inc_text += f'{asset_label} + $%04x\n' % item_range.begin
# update item offsets in the include file
rt.insert_asm(inc_path, inc_text)
# create the text codec
char_table = {}
for char_table_name in asset_def['char_tables']:
char_table_path = 'tools/char_table/' + char_table_name + '.json'
with open(char_table_path, 'r', encoding='utf8') as char_table_file:
char_table.update(json.load(char_table_file))
text_codec = rt.TextCodec(char_table)
# decode the text strings
text_list = []
for item_range in item_ranges:
item_bytes = asset_bytes[item_range.begin:item_range.end + 1]
text_list.append(text_codec.decode_text(item_bytes))
asset_def['text'] = text_list
# write text strings to the asset file
asset_json = json.dumps(asset_def, ensure_ascii=False, indent=2)
with open(json_path, 'w', encoding='utf8') as f:
f.write(asset_json)
def extract_array(self, file_path, inc_path, asset_range, asset_label, **kwargs):
# extract the array data from the ROM
asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
if os.path.exists(file_path):
return
# write data file
print(f'{asset_range} -> {file_path}')
self.write_asset_file(asset_bytes, file_path)
# check if an include file exists
if os.path.exists(inc_path):
# define the size
inc_text = ASM_INDENT + f'SIZE = {len(asset_bytes)}\n'
# define the array length
inc_text += ASM_INDENT + f'ARRAY_LENGTH = {len(item_ranges)}\n'
# define item offsets
inc_text += '\n'
for id, item_range in enumerate(item_ranges):
inc_text += ASM_INDENT + '_%d := ' % id
inc_text += f'{asset_label} + $%04x\n' % item_range.begin
# update item offsets in the include file
rt.insert_asm(inc_path, inc_text)
def extract_asset(self, file_path, asset_range, **kwargs):
# extract the asset from the ROM
asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
# generate a list of file names
if 'file_list' in kwargs:
file_list = kwargs['file_list']
assert len(file_list) == len(item_ranges)
else:
file_list = [('%04x' % i) for i in range(len(item_ranges))]
path_list = [
file_path.replace('%s', file_list[i])
for i in range(len(item_ranges))
]
extracted_one = False
for i, item_range in enumerate(item_ranges):
if os.path.exists(path_list[i]):
continue
if item_range.is_empty() or item_range.begin < 0:
continue
if not extracted_one:
extracted_one = True
print(f'{asset_range} -> {file_path}')
gfx_bytes = asset_bytes[item_range.begin:item_range.end + 1]
self.write_asset_file(gfx_bytes, path_list[i])

View File

@ -1,5 +1,10 @@
import romtools as rt
'''
Converts a block of binary data into a string that can be interpreted by
the ca65 compiler. Labels can be inserted and referenced by symbols to
generate relocateable code.
'''
def bytes_to_asm(bytes, labels=None, symbols=None, line_width=16):
asm_string = ''
@ -51,7 +56,7 @@ def bytes_to_asm(bytes, labels=None, symbols=None, line_width=16):
# determine the value size
if type == '.byte':
width = 1
if type == '.word' or type == '.addr':
elif type == '.word' or type == '.addr':
width = 2
elif type == '.faraddr':
width = 3

View File

@ -1,3 +1,9 @@
'''
Convert an integer into a string in hexadecimal format. The output string can
optionally be padded with zeros and prefix can optionally be added to the
front of the string.
'''
def hex_string(num, pad=None, prefix='0x'):
if pad is not None:
pad = int(pad)

View File

@ -1,5 +1,12 @@
import romtools as rt
'''
Helper class to convert ROM file addresses between the address space of the
ROM file and the memory mapper address space used by the console CPU. Because
memory mapper address spaces are often non-contiguous at bank boundaries,
ranges should be specified as closed intervals (end value is included). This
is in contrast to the behavior of Python functions like range, etc.
'''
class MemoryMap:

View File

@ -1,5 +1,8 @@
import romtools as rt
'''
Closed-interval range object suitable for defining regions in a ROM file.
'''
class Range:

View File

@ -1,13 +1,27 @@
import re
import json
import romtools as rt
'''
Helper object for encoding and decoding ROM text. Text characters can map to
one- or two-byte codes, and multiple text characters can map to the same code
value (with the first value listed in the character map being the default).
Text strings can include escape codes enclosed in braces "{}". Escape codes
can optionally be followed by a one- or two-bytes parameter.
'''
ESCAPE_REGEX = r'{(\w+)(?:\:(\w+))?}'
class TextCodec:
def __init__(self, char_table):
def __init__(self, asset_def):
# create the character table
char_table = {}
for char_table_name in asset_def['char_tables']:
char_table_path = 'tools/char_table/' + char_table_name + '.json'
with open(char_table_path, 'r', encoding='utf8') as char_table_file:
char_table.update(json.load(char_table_file))
# merge multiple character tables into a single list
if isinstance(char_table, list):
@ -49,13 +63,12 @@ class TextCodec:
self.encoding_table[value] = code
assert isinstance(primary_value, str)
self.decoding_table[code] = primary_value
self.encoding_keys = self.encoding_table.keys()
self.terminator_code = self.encoding_table.get('{0}')
def decode_text(self, text_bytes):
def decode(self, text_bytes):
text = ''
i = 0
@ -118,7 +131,7 @@ class TextCodec:
return text
def encode_text(self, text_str):
def encode(self, text_str):
i = 0
key_list = self.encoding_table.keys()
text_codes = []
@ -233,3 +246,58 @@ class TextCodec:
i += int(escape_match.group(1))
return min(i, len(text_bytes))
def encode_text(asset_def):
text_codec = TextCodec(asset_def)
# encode each string
encoded_bytes = bytearray()
item_ranges = []
for text_item in asset_def['text']:
encoded_text = text_codec.encode(text_item)
if 'item_size' in asset_def:
# fixed length strings
item_size = asset_def['item_size']
# check if text is too long
assert len(encoded_text) <= item_size, \
f'Text string \"{text_item}\" too long by ' \
f'{len(encoded_text) - item_size} char(s)'
# pad the text
if len(encoded_text) != item_size:
assert '{pad}' in text_codec.encoding_table, \
f'Padding not found in char table'
pad_char = text_codec.encoding_table['{pad}']
item_size = asset_def['item_size']
while len(encoded_text) < item_size:
encoded_text.append(pad_char)
item_offset = len(encoded_bytes)
item_ranges.append(rt.Range(item_offset, item_offset + item_size))
encoded_bytes += encoded_text
elif 'is_sequential' in asset_def:
# items must be sequential, don't allow shared items
item_offset = len(encoded_bytes)
item_size = len(encoded_text)
item_ranges.append(rt.Range(item_offset, item_offset + item_size))
encoded_bytes += encoded_text
else:
# allow shared items
shared_offset = encoded_bytes.find(encoded_text)
item_size = len(encoded_text)
if shared_offset == -1:
item_offset = len(encoded_bytes)
item_ranges.append(rt.Range(item_offset, item_offset + item_size))
encoded_bytes += encoded_text
else:
item_ranges.append(rt.Range(shared_offset, shared_offset + item_size))
# update the include file
rt.update_array_inc(encoded_bytes, item_ranges, **asset_def)
return encoded_bytes, item_ranges

View File

@ -0,0 +1,40 @@
import os
import romtools as rt
'''
Write offsets for the items in an array into an include file.
'''
ASM_INDENT = ' ' * 8
def update_array_inc(asset_bytes, item_ranges, **kwargs):
if 'inc_path' not in kwargs:
return
inc_path = kwargs['inc_path']
assert os.path.exists(inc_path), f'Missing include file: {inc_path}'
# get the asset label
assert 'asset_label' in kwargs, 'Missing asset_label'
asset_label = kwargs['asset_label']
# define the number of items in the array
inc_text = ASM_INDENT + f'ARRAY_LENGTH = {len(item_ranges)}\n'
if 'item_size' in kwargs:
# fixed item size
inc_text += ASM_INDENT + f'ITEM_SIZE = '
inc_text += str(kwargs['item_size']) + '\n'
inc_text += ASM_INDENT + 'SIZE = ARRAY_LENGTH * ITEM_SIZE\n'
else:
# variable item size
inc_text += ASM_INDENT + f'SIZE = {len(asset_bytes)}\n\n'
# define item offsets
for id, item_range in enumerate(item_ranges):
inc_text += ASM_INDENT + '_%d := ' % id
inc_text += f'{asset_label} + $%04x\n' % item_range.begin
# update item offsets in the include file
rt.insert_asm(inc_path, inc_text)