Improve text encoding/decoding

Eliminated automatically generated .inc files for fixed-length text arrays. Improved python code for extracting, decoding, and encoding text. Finished implementing the DTE optimization algorithm, only to find that it's no better than the vanilla algorithm!
2024-10-07 02:23:31 +00:00 · 2024-08-12 15:58:20 -04:00 · 2024-08-12 15:58:20 -04:00 · 0ece2bcbef
commit 0ece2bcbef
parent 56b32333a7
86 changed files with 732 additions and 610 deletions
--- a/include/text/attack_msg_en.inc
+++ b/include/text/attack_msg_en.inc
@ -6,6 +6,8 @@ ATTACK_MSG_INC = 1

 .scope AttackMsg

+        Start := bank_start AttackMsg
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/attack_msg_jp.inc
+++ b/include/text/attack_msg_jp.inc
@ -6,6 +6,8 @@ ATTACK_MSG_INC = 1

 .scope AttackMsg

+        Start := bank_start AttackMsg
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/attack_name_en.inc
+++ b/include/text/attack_name_en.inc
@ -6,12 +6,9 @@ ATTACK_NAME_INC = 1

 .scope AttackName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 175
+        ITEM_SIZE = 10
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/attack_name_jp.inc
+++ b/include/text/attack_name_jp.inc
@ -6,12 +6,9 @@ ATTACK_NAME_INC = 1

 .scope AttackName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 175
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/battle_cmd_name_en.inc
+++ b/include/text/battle_cmd_name_en.inc
@ -6,12 +6,9 @@ BATTLE_CMD_NAME_INC = 1

 .scope BattleCmdName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 32
+        ITEM_SIZE = 7
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/battle_cmd_name_jp.inc
+++ b/include/text/battle_cmd_name_jp.inc
@ -6,12 +6,9 @@ BATTLE_CMD_NAME_INC = 1

 .scope BattleCmdName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 32
+        ITEM_SIZE = 6
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/battle_dlg_en.inc
+++ b/include/text/battle_dlg_en.inc
@ -6,6 +6,8 @@ BATTLE_DLG_INC = 1

 .scope BattleDlg

+        Start := bank_start BattleDlg
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/battle_dlg_jp.inc
+++ b/include/text/battle_dlg_jp.inc
@ -6,6 +6,8 @@ BATTLE_DLG_INC = 1

 .scope BattleDlg

+        Start := bank_start BattleDlg
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/bushido_name_en.inc
+++ b/include/text/bushido_name_en.inc
@ -6,12 +6,9 @@ BUSHIDO_NAME_INC = 1

 .scope BushidoName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 8
+        ITEM_SIZE = 12
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/bushido_name_jp.inc
+++ b/include/text/bushido_name_jp.inc
@ -6,12 +6,9 @@ BUSHIDO_NAME_INC = 1

 .scope BushidoName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 8
+        ITEM_SIZE = 6
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/char_name_en.inc
+++ b/include/text/char_name_en.inc
@ -6,12 +6,9 @@ CHAR_NAME_INC = 1

 .scope CharName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 64
+        ITEM_SIZE = 6
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/char_name_jp.inc
+++ b/include/text/char_name_jp.inc
@ -6,12 +6,9 @@ CHAR_NAME_INC = 1

 .scope CharName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 64
+        ITEM_SIZE = 6
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/char_title_jp.inc
+++ b/include/text/char_title_jp.inc
@ -6,12 +6,9 @@ CHAR_TITLE_INC = 1

 .scope CharTitle

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 64
+        ITEM_SIZE = 7
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/dance_name_en.inc
+++ b/include/text/dance_name_en.inc
@ -6,12 +6,9 @@ DANCE_NAME_INC = 1

 .scope DanceName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 8
+        ITEM_SIZE = 12
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/dance_name_jp.inc
+++ b/include/text/dance_name_jp.inc
@ -6,12 +6,9 @@ DANCE_NAME_INC = 1

 .scope DanceName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 8
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/dlg1_en.inc
+++ b/include/text/dlg1_en.inc
@ -6,7 +6,7 @@ DLG1_INC = 1

 .scope Dlg1

-Start = Dlg1
+        Start := bank_start Dlg1

 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
--- a/include/text/dlg1_jp.inc
+++ b/include/text/dlg1_jp.inc
@ -6,6 +6,8 @@ DLG1_INC = 1

 .scope Dlg1

+        Start := bank_start Dlg1
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/dlg2_en.inc
+++ b/include/text/dlg2_en.inc
@ -6,6 +6,8 @@ DLG2_INC = 1

 .scope Dlg2

+        Start := bank_start Dlg2
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/dlg2_jp.inc
+++ b/include/text/dlg2_jp.inc
@ -6,6 +6,8 @@ DLG2_INC = 1

 .scope Dlg2

+        Start := bank_start Dlg2
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/genju_attack_name_en.inc
+++ b/include/text/genju_attack_name_en.inc
@ -6,12 +6,9 @@ GENJU_ATTACK_NAME_INC = 1

 .scope GenjuAttackName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 27
+        ITEM_SIZE = 10
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/genju_attack_name_jp.inc
+++ b/include/text/genju_attack_name_jp.inc
@ -6,12 +6,9 @@ GENJU_ATTACK_NAME_INC = 1

 .scope GenjuAttackName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 27
+        ITEM_SIZE = 9
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/genju_bonus_name_en.inc
+++ b/include/text/genju_bonus_name_en.inc
@ -6,12 +6,9 @@ GENJU_BONUS_NAME_INC = 1

 .scope GenjuBonusName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 17
+        ITEM_SIZE = 9
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/genju_bonus_name_jp.inc
+++ b/include/text/genju_bonus_name_jp.inc
@ -6,12 +6,9 @@ GENJU_BONUS_NAME_INC = 1

 .scope GenjuBonusName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 17
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/genju_name_en.inc
+++ b/include/text/genju_name_en.inc
@ -6,12 +6,9 @@ GENJU_NAME_INC = 1

 .scope GenjuName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 27
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/genju_name_jp.inc
+++ b/include/text/genju_name_jp.inc
@ -6,12 +6,9 @@ GENJU_NAME_INC = 1

 .scope GenjuName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 27
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/item_name_en.inc
+++ b/include/text/item_name_en.inc
@ -6,12 +6,9 @@ ITEM_NAME_INC = 1

 .scope ItemName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 256
+        ITEM_SIZE = 13
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/item_name_jp.inc
+++ b/include/text/item_name_jp.inc
@ -6,12 +6,9 @@ ITEM_NAME_INC = 1

 .scope ItemName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 256
+        ITEM_SIZE = 9
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/item_type_name_en.inc
+++ b/include/text/item_type_name_en.inc
@ -6,12 +6,9 @@ ITEM_TYPE_NAME_INC = 1

 .scope ItemTypeName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 16
+        ITEM_SIZE = 7
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/magic_name_en.inc
+++ b/include/text/magic_name_en.inc
@ -6,12 +6,9 @@ MAGIC_NAME_INC = 1

 .scope MagicName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 54
+        ITEM_SIZE = 7
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/magic_name_jp.inc
+++ b/include/text/magic_name_jp.inc
@ -6,12 +6,9 @@ MAGIC_NAME_INC = 1

 .scope MagicName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 54
+        ITEM_SIZE = 5
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/monster_dlg_en.inc
+++ b/include/text/monster_dlg_en.inc
@ -6,6 +6,8 @@ MONSTER_DLG_INC = 1

 .scope MonsterDlg

+        Start := bank_start MonsterDlg
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/monster_dlg_jp.inc
+++ b/include/text/monster_dlg_jp.inc
@ -6,6 +6,8 @@ MONSTER_DLG_INC = 1

 .scope MonsterDlg

+        Start := bank_start MonsterDlg
+
 ; ##############################################################################
 ; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
 ; ##############################################################################
--- a/include/text/monster_name_en.inc
+++ b/include/text/monster_name_en.inc
@ -6,12 +6,9 @@ MONSTER_NAME_INC = 1

 .scope MonsterName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 384
+        ITEM_SIZE = 10
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/monster_name_jp.inc
+++ b/include/text/monster_name_jp.inc
@ -6,12 +6,9 @@ MONSTER_NAME_INC = 1

 .scope MonsterName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 384
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/monster_special_name_en.inc
+++ b/include/text/monster_special_name_en.inc
@ -6,12 +6,9 @@ MONSTER_SPECIAL_NAME_INC = 1

 .scope MonsterSpecialName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 384
+        ITEM_SIZE = 10
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/monster_special_name_jp.inc
+++ b/include/text/monster_special_name_jp.inc
@ -6,12 +6,9 @@ MONSTER_SPECIAL_NAME_INC = 1

 .scope MonsterSpecialName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 384
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/rare_item_name_en.inc
+++ b/include/text/rare_item_name_en.inc
@ -6,12 +6,9 @@ RARE_ITEM_NAME_INC = 1

 .scope RareItemName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 20
+        ITEM_SIZE = 13
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/rare_item_name_jp.inc
+++ b/include/text/rare_item_name_jp.inc
@ -6,12 +6,9 @@ RARE_ITEM_NAME_INC = 1

 .scope RareItemName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 30
+        ITEM_SIZE = 8
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/status_name_en.inc
+++ b/include/text/status_name_en.inc
@ -6,12 +6,9 @@ STATUS_NAME_INC = 1

 .scope StatusName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 32
+        ITEM_SIZE = 10
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/include/text/status_name_jp.inc
+++ b/include/text/status_name_jp.inc
@ -6,12 +6,9 @@ STATUS_NAME_INC = 1

 .scope StatusName

-; ##############################################################################
-; ###          AUTOMATICALLY GENERATED CODE, DO NOT MODIFY MANUALLY          ###
-; ##############################################################################
-
-
-; ##############################################################################
+        ARRAY_LENGTH = 32
+        ITEM_SIZE = 7
+        SIZE = ARRAY_LENGTH * ITEM_SIZE

 .endscope

--- a/src/text/attack_name_en.json
+++ b/src/text/attack_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "AttackName",
-  "inc_path": "include/text/attack_name_en.inc",
  "item_size": 10,
  "char_tables": [
    "fixed_length_en",
--- a/src/text/attack_name_jp.json
+++ b/src/text/attack_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "AttackName",
-  "inc_path": "include/text/attack_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/battle_cmd_name_en.json
+++ b/src/text/battle_cmd_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "BattleCmdName",
-  "inc_path": "include/text/battle_cmd_name_en.inc",
  "item_size": 7,
  "char_tables": [
    "fixed_length_en",
--- a/src/text/battle_cmd_name_jp.json
+++ b/src/text/battle_cmd_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "BattleCmdName",
-  "inc_path": "include/text/battle_cmd_name_jp.inc",
  "item_size": 6,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/bushido_name_en.json
+++ b/src/text/bushido_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "BushidoName",
-  "inc_path": "include/text/bushido_name_en.inc",
  "item_size": 12,
  "char_tables": [
    "text_en",
--- a/src/text/bushido_name_jp.json
+++ b/src/text/bushido_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "BushidoName",
-  "inc_path": "include/text/bushido_name_jp.inc",
  "item_size": 6,
  "char_tables": [
    "bushido_name"
--- a/src/text/char_name_en.json
+++ b/src/text/char_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "CharName",
-  "inc_path": "include/text/char_name_en.inc",
  "item_size": 6,
  "char_tables": [
    "text_en",
--- a/src/text/char_name_jp.json
+++ b/src/text/char_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "CharName",
-  "inc_path": "include/text/char_name_jp.inc",
  "item_size": 6,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/char_title_jp.json
+++ b/src/text/char_title_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "CharTitle",
-  "inc_path": "include/text/char_title_jp.inc",
  "item_size": 7,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/dance_name_en.json
+++ b/src/text/dance_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "DanceName",
-  "inc_path": "include/text/dance_name_en.inc",
  "item_size": 12,
  "char_tables": [
    "text_en",
--- a/src/text/dance_name_jp.json
+++ b/src/text/dance_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "DanceName",
-  "inc_path": "include/text/dance_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/dte_tbl_en_new.json
+++ b/src/text/dte_tbl_en_new.json
@ -0,0 +1,137 @@
+{
+  "asset_label": "DTETbl",
+  "item_size": 2,
+  "char_tables": [
+    "dialog_en"
+  ],
+  "text": [
+    "e ",
+    " t",
+    "he",
+    "th",
+    "t ",
+    "s ",
+    "er",
+    " a",
+    ": ",
+    "re",
+    "ou",
+    "in",
+    "d ",
+    " w",
+    " h",
+    " s",
+    "an",
+    "o ",
+    "n ",
+    "r ",
+    "ha",
+    " o",
+    "to",
+    "on",
+    " i",
+    "at",
+    " m",
+    ", ",
+    "ng",
+    "ve",
+    "st",
+    "y ",
+    " b",
+    "ar",
+    "or",
+    "me",
+    "hi",
+    "yo",
+    "en",
+    "ea",
+    "ll",
+    "is",
+    " y",
+    " f",
+    "it",
+    "ed",
+    "l ",
+    "es",
+    " I",
+    "nd",
+    "le",
+    " c",
+    "se",
+    "ne",
+    "te",
+    "a ",
+    "'s",
+    "as",
+    " l",
+    "al",
+    " p",
+    "I ",
+    "g ",
+    "ur",
+    "pe",
+    "ow",
+    "om",
+    " d",
+    "Th",
+    " g",
+    "et",
+    "f ",
+    "ro",
+    "be",
+    "u ",
+    "wa",
+    "us",
+    "ut",
+    "rs",
+    " r",
+    "ri",
+    "nt",
+    "li",
+    "we",
+    "of",
+    "el",
+    "ma",
+    "ho",
+    "ee",
+    "ta",
+    " n",
+    "de",
+    "h ",
+    "oo",
+    "gh",
+    "m ",
+    "ca",
+    "ra",
+    "so",
+    "no",
+    "ti",
+    "e!",
+    "ld",
+    "ir",
+    "ay",
+    "n'",
+    "av",
+    "lo",
+    "fo",
+    "! ",
+    " E",
+    " u",
+    "ce",
+    "co",
+    " W",
+    "ac",
+    "ke",
+    "e_",
+    "e'",
+    "ch",
+    "I'",
+    " T",
+    "un",
+    "ig",
+    "il",
+    " e",
+    "ai",
+    ". "
+  ]
+}
--- a/src/text/genju_attack_name_en.json
+++ b/src/text/genju_attack_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "GenjuAttackName",
-  "inc_path": "include/text/genju_attack_name_en.inc",
  "item_size": 10,
  "char_tables": [
    "big_symbols_en",
--- a/src/text/genju_attack_name_jp.json
+++ b/src/text/genju_attack_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "GenjuAttackName",
-  "inc_path": "include/text/genju_attack_name_jp.inc",
  "item_size": 9,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/genju_bonus_name_en.json
+++ b/src/text/genju_bonus_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "GenjuBonusName",
-  "inc_path": "include/text/genju_bonus_name_en.inc",
  "item_size": 9,
  "char_tables": [
    "small_symbols_en",
--- a/src/text/genju_bonus_name_jp.json
+++ b/src/text/genju_bonus_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "GenjuBonusName",
-  "inc_path": "include/text/genju_bonus_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "small_symbols_jp",
--- a/src/text/genju_name_en.json
+++ b/src/text/genju_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "GenjuName",
-  "inc_path": "include/text/genju_name_en.inc",
  "item_size": 8,
  "char_tables": [
    "text_en",
--- a/src/text/genju_name_jp.json
+++ b/src/text/genju_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "GenjuName",
-  "inc_path": "include/text/genju_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "small_symbols_jp",
--- a/src/text/item_name_en.json
+++ b/src/text/item_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "ItemName",
-  "inc_path": "include/text/item_name_en.inc",
  "item_size": 13,
  "char_tables": [
    "text_en",
--- a/src/text/item_name_jp.json
+++ b/src/text/item_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "ItemName",
-  "inc_path": "include/text/item_name_jp.inc",
  "item_size": 9,
  "char_tables": [
    "small_symbols_jp",
--- a/src/text/item_type_name_en.json
+++ b/src/text/item_type_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "ItemTypeName",
-  "inc_path": "include/text/item_type_name_en.inc",
  "item_size": 7,
  "char_tables": [
    "text_en",
--- a/src/text/magic_name_en.json
+++ b/src/text/magic_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "MagicName",
-  "inc_path": "include/text/magic_name_en.inc",
  "item_size": 7,
  "char_tables": [
    "text_en",
--- a/src/text/magic_name_jp.json
+++ b/src/text/magic_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "MagicName",
-  "inc_path": "include/text/magic_name_jp.inc",
  "item_size": 5,
  "char_tables": [
    "small_symbols_jp",
--- a/src/text/monster_name_en.json
+++ b/src/text/monster_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "MonsterName",
-  "inc_path": "include/text/monster_name_en.inc",
  "item_size": 10,
  "char_tables": [
    "fixed_length_en",
--- a/src/text/monster_name_jp.json
+++ b/src/text/monster_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "MonsterName",
-  "inc_path": "include/text/monster_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "small_symbols_jp",
--- a/src/text/monster_special_name_en.json
+++ b/src/text/monster_special_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "MonsterSpecialName",
-  "inc_path": "include/text/monster_special_name_en.inc",
  "item_size": 10,
  "char_tables": [
    "fixed_length_en",
--- a/src/text/monster_special_name_jp.json
+++ b/src/text/monster_special_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "MonsterSpecialName",
-  "inc_path": "include/text/monster_special_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "fixed_length_jp",
--- a/src/text/rare_item_name_en.json
+++ b/src/text/rare_item_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "RareItemName",
-  "inc_path": "include/text/rare_item_name_en.inc",
  "item_size": 13,
  "char_tables": [
    "text_en",
--- a/src/text/rare_item_name_jp.json
+++ b/src/text/rare_item_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "RareItemName",
-  "inc_path": "include/text/rare_item_name_jp.inc",
  "item_size": 8,
  "char_tables": [
    "small_symbols_jp",
--- a/src/text/status_name_en.json
+++ b/src/text/status_name_en.json
@ -1,6 +1,5 @@
 {
  "asset_label": "StatusName",
-  "inc_path": "include/text/status_name_en.inc",
  "item_size": 10,
  "char_tables": [
    "text_en",
--- a/src/text/status_name_jp.json
+++ b/src/text/status_name_jp.json
@ -1,6 +1,5 @@
 {
  "asset_label": "StatusName",
-  "inc_path": "include/text/status_name_jp.inc",
  "item_size": 7,
  "char_tables": [
    "small_symbols_jp",
--- a/tools/char_table/big_symbols_jp.json
+++ b/tools/char_table/big_symbols_jp.json
@ -9,6 +9,5 @@
  "0xD4": "『",
  "0xD5": "（",
  "0xD6": "）",
-  "0xD7": "・",
-  "0xD8": "……"
+  "0xD7": "・"
 }
--- a/tools/char_table/kana.json
+++ b/tools/char_table/kana.json
@ -166,7 +166,11 @@
  "0xC4": "ァ",
  "0xC5": "ー",
  "0xC6": "ィ",
-  "0xC7": "…",
+  "0xC7": [
+    "＿",
+    "_",
+    "…"
+  ],
  "0xC8": "ゥ",
  "0xC9": "！",
  "0xCA": "ェ",
--- a/tools/char_table/mte.json
+++ b/tools/char_table/mte.json
@ -1,4 +1,9 @@
 {
+  "0xD8": [
+    "＿＿",
+    "__",
+    "……"
+  ],
  "0xD9": "帝国",
  "0xDA": "って",
  "0xDB": "った",
--- a/tools/encode_text.py
+++ b/tools/encode_text.py
@ -5,83 +5,6 @@ import os
 import sys
 import romtools as rt

-ASM_INDENT = ' ' * 8
-
-def encode_text(asset_def):
-
-    # create the text codec
-    char_table = {}
-    for char_table_name in asset_def['char_tables']:
-        char_table_path = os.path.join('tools', 'char_table', char_table_name + '.json')
-        with open(char_table_path, 'r', encoding='utf8') as char_table_file:
-            char_table.update(json.load(char_table_file))
-    text_codec = rt.TextCodec(char_table)
-
-    # encode each string
-    encoded_bytes = bytearray()
-    item_offsets = []
-    for text_item in asset_def['text']:
-        encoded_text = text_codec.encode_text(text_item)
-
-        if 'item_size' in asset_def:
-            # fixed length strings
-            item_size = asset_def['item_size']
-
-            # check if text is too long
-            assert len(encoded_text) <= item_size, \
-                f'Text string \"{text_item}\" too long by ' \
-                f'{len(encoded_text) - item_size} char(s)'
-
-            # pad the text
-            if len(encoded_text) != item_size:
-                assert '{pad}' in text_codec.encoding_table, \
-                    f'Padding not found in char table'
-                pad_char = text_codec.encoding_table['{pad}']
-                item_size = asset_def['item_size']
-                while len(encoded_text) < item_size:
-                    encoded_text.append(pad_char)
-
-            item_offsets.append(len(encoded_bytes))
-            encoded_bytes += encoded_text
-
-        elif 'is_sequential' in asset_def:
-            # items must be sequential, don't allow shared items
-            item_offsets.append(len(encoded_bytes))
-            encoded_bytes += encoded_text
-
-        else:
-            # allow shared items
-            shared_offset = encoded_bytes.find(encoded_text)
-            if shared_offset == -1:
-                item_offsets.append(len(encoded_bytes))
-                encoded_bytes += encoded_text
-            else:
-                item_offsets.append(shared_offset)
-
-    return item_offsets, encoded_bytes
-
-
-def update_text_inc(asset_def, item_offsets):
-
-    asset_label = asset_def['asset_label']
-
-    # define the array length
-    inc_text = ASM_INDENT + f'ARRAY_LENGTH = {len(item_offsets)}\n'
-
-    if 'item_size' in asset_def:
-        # fixed item size
-        inc_text += ASM_INDENT + f'ITEM_SIZE = '
-        inc_text += str(asset_def['item_size']) + '\n'
-    else:
-        # define item offsets
-        inc_text += '\n'
-        for id, offset in enumerate(item_offsets):
-            inc_text += ASM_INDENT + f'_%d := ' % id
-            inc_text += f'{asset_label} + $%04x\n' % offset
-
-    # update item offsets in the include file
-    rt.insert_asm(asset_def['inc_path'], inc_text)
-

 if __name__ == '__main__':

@ -91,14 +14,11 @@ if __name__ == '__main__':
    with open(asset_path, 'r', encoding='utf8') as json_file:
        asset_def = json.load(json_file)

-    item_offsets, encoded_bytes = encode_text(asset_def)
+    # encode the text
+    encoded_bytes, _ = rt.encode_text(asset_def)

    # write the encoded binary data to the data path
    asset_root, _ = os.path.splitext(asset_path)
    dat_path = asset_root + '.dat'
    with open(dat_path, 'wb') as f:
        f.write(encoded_bytes)
-
-    # if necessary, update the offsets in the include file
-    if 'inc_path' in asset_def:
-        update_text_inc(asset_def, item_offsets)
--- a/tools/extract_assets.py
+++ b/tools/extract_assets.py
@ -7,9 +7,246 @@ import romtools as rt
 from ff6_lzss import *
 from monster_stencil import apply_stencil

+class AssetExtractor:
+
+    def __init__(self, rom_bytes, map_mode):
+        self.rom_bytes = rom_bytes
+        self.memory_map = rt.MemoryMap(map_mode)
+
+    def extract_object(self, asset_range, **kwargs):
+
+        # calculate the appropriate ROM range using the mapper
+        unmapped_range = rt.Range(asset_range)
+        mapped_range = self.memory_map.map_range(unmapped_range)
+
+        # extract the asset data
+        asset_bytes = self.rom_bytes[mapped_range.begin:mapped_range.end + 1]
+
+        # make a list of pointers for each item in the asset
+        pointer_list = []
+
+        if 'ptr_range' in kwargs:
+            # array with a pointer table
+            is_mapped = kwargs.get('is_mapped', False)
+            ptr_offset = kwargs.get('ptr_offset', 0)
+            if isinstance(ptr_offset, str):
+                ptr_offset = int(ptr_offset, 0)
+
+            if not is_mapped:
+                # map the pointer offset first, then add pointers
+                ptr_offset = self.memory_map.map_address(ptr_offset)
+
+            # extract the pointer table data
+            ptr_range = rt.Range(kwargs['ptr_range'])
+            ptr_range = self.memory_map.map_range(ptr_range)
+            ptr_data = self.rom_bytes[ptr_range.begin:ptr_range.end + 1]
+            ptr_size = kwargs.get('ptr_size', 2)
+            assert len(ptr_data) % ptr_size == 0, 'Pointer table length' \
+                + ' is not divisible by pointer size'
+            array_length = len(ptr_data) // ptr_size
+
+            for i in range(array_length):
+                pointer = ptr_data[i * ptr_size]
+                if ptr_size > 1:
+                    pointer |= ptr_data[i * ptr_size + 1] << 8
+                if ptr_size > 2:
+                    pointer |= ptr_data[i * ptr_size + 2] << 16
+                if ptr_size > 3:
+                    pointer |= ptr_data[i * ptr_size + 3] << 24
+
+                pointer += ptr_offset
+                if is_mapped:
+                    # map pointer after adding pointer offset
+                    pointer = self.memory_map.map_address(pointer)
+                pointer_list.append(pointer - mapped_range.begin)
+
+        elif 'item_offsets' in kwargs:
+            # items with specified offsets
+            item_offsets = kwargs['item_offsets']
+            array_length = len(item_offsets)
+            for begin in item_offsets:
+                if isinstance(begin, str):
+                    begin = int(begin, 0)
+                begin = self.memory_map.map_address(begin)
+                pointer_list.append(begin - mapped_range.begin)
+
+        elif 'terminator' in kwargs:
+            # terminated items
+            terminator = kwargs['terminator']
+            if isinstance(terminator, str):
+                terminator = int(terminator, 0)
+            pointer_list.append(0)
+            for p in range(len(asset_bytes) - 1):
+                if asset_bytes[p] == terminator:
+                    pointer_list.append(p + 1)
+            array_length = len(pointer_list)
+
+        elif 'item_size' in kwargs:
+            # fixed item size
+            item_size = kwargs['item_size']
+            if isinstance(item_size, str):
+                item_size = int(item_size, 0)
+            assert len(asset_bytes) % item_size == 0, \
+                'Fixed-length array size mismatch'
+            array_length = len(asset_bytes) // item_size
+            for i in range(array_length):
+                pointer_list.append(i * item_size)
+
+        else:
+            # single object
+            pointer_list.append(0)
+            array_length = 1
+
+        # remove duplicates and sort pointers
+        sorted_pointers = sorted(set(pointer_list))
+
+        # create a list of pointer ranges (these don't correspond with item
+        # ranges for terminated and sequential items)
+        pointer_ranges = {}
+        for p, pointer in enumerate(sorted_pointers):
+            begin = pointer
+            if p == len(sorted_pointers) - 1:
+                end = len(asset_bytes) - 1
+            else:
+                end = sorted_pointers[p + 1] - 1
+            pointer_ranges[begin] = rt.Range(begin, end)
+
+        # create ranges for each item
+        item_ranges = []
+
+        for i in range(array_length):
+            begin = pointer_list[i]
+            if 'terminator' in kwargs:
+                # item range goes until terminator is found
+                end = begin
+                terminator = kwargs['terminator']
+                if isinstance(terminator, str):
+                    terminator = int(terminator, 0)
+                while end < len(asset_bytes):
+                    if asset_bytes[end] == terminator:
+                        break
+                    end = end + 1
+                item_ranges.append(rt.Range(begin, end))
+
+            elif kwargs.get('is_sequential', False):
+                if i != array_length - 1:
+                    # item range goes up to next sequential pointer
+                    end = pointer_list[i + 1] - 1
+                else:
+                    # last item goes up to end of asset range
+                    end = len(asset_bytes) - 1
+                item_ranges.append(rt.Range(begin, end))
+
+            else:
+                # otherwise, item range is same as pointer range
+                item_ranges.append(pointer_ranges[begin])
+
+        return asset_bytes, item_ranges
+
+    def write_asset_file(self, asset_bytes, asset_path):
+
+        # create directories
+        os.makedirs(os.path.dirname(asset_path), exist_ok=True)
+
+        # decompress the data, if necessary
+        if asset_path.endswith('.lz'):
+            with open(asset_path[:-3], 'wb') as f:
+                f.write(decode_lzss(asset_bytes))
+
+        # save the raw data
+        with open(asset_path, 'wb') as f:
+            f.write(asset_bytes)
+
+    def extract_text(self, json_path, asset_range, **kwargs):
+
+        # read the json file
+        with open(json_path, 'r', encoding='utf8') as json_file:
+            asset_def = json.load(json_file)
+
+        if 'item_size' in asset_def:
+            kwargs['item_size'] = asset_def['item_size']
+
+        asset_root, _ = os.path.splitext(json_path)
+
+        # check if the data file already exists
+        dat_path = asset_root + '.dat'
+        if os.path.exists(dat_path):
+            return
+
+        # otherwise, we need to extract the text and create the data file
+        print(f'{asset_range} -> {json_path}')
+
+        # extract the text from the ROM
+        asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
+
+        # write data file
+        self.write_asset_file(asset_bytes, dat_path)
+
+        # update include file
+        rt.update_array_inc(asset_bytes, item_ranges, **asset_def)
+
+        # create the text codec
+        text_codec = rt.TextCodec(asset_def)
+
+        # decode the text strings
+        text_list = []
+        for item_range in item_ranges:
+            item_bytes = asset_bytes[item_range.begin:item_range.end + 1]
+            text_list.append(text_codec.decode(item_bytes))
+
+        asset_def['text'] = text_list
+
+        # write text strings to the asset file
+        asset_json = json.dumps(asset_def, ensure_ascii=False, indent=2)
+        with open(json_path, 'w', encoding='utf8') as f:
+            f.write(asset_json)
+
+    def extract_array(self, file_path, asset_range, **kwargs):
+
+        # extract the array data from the ROM
+        asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
+
+        if os.path.exists(file_path):
+            return
+
+        # write data file
+        print(f'{asset_range} -> {file_path}')
+        self.write_asset_file(asset_bytes, file_path)
+
+        # check if an include file exists
+        rt.update_array_inc(asset_bytes, item_ranges, **kwargs)
+
+    def extract_asset(self, file_path, asset_range, **kwargs):
+
+        # extract the asset from the ROM
+        asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
+
+        # generate a list of file names
+        if 'file_list' in kwargs:
+            file_list = kwargs['file_list']
+            assert len(file_list) == len(item_ranges)
+        else:
+            file_list = [('%04x' % i) for i in range(len(item_ranges))]
+        path_list = [
+            file_path.replace('%s', file_list[i])
+            for i in range(len(item_ranges))
+        ]
+
+        extracted_one = False
+        for i, item_range in enumerate(item_ranges):
+            if os.path.exists(path_list[i]):
+                continue
+            if item_range.is_empty() or item_range.begin < 0:
+                continue
+            if not extracted_one:
+                extracted_one = True
+                print(f'{asset_range} -> {file_path}')
+            gfx_bytes = asset_bytes[item_range.begin:item_range.end + 1]
+            self.write_asset_file(gfx_bytes, path_list[i])
+
 def extract_rom(rom_bytes, language):

-    ae = rt.AssetExtractor(rom_bytes, 'hirom')
+    ae = AssetExtractor(rom_bytes, 'hirom')

    # load rip info
    rip_list_path = os.path.join('tools', f'rip_list_{language}.json')
@ -66,7 +303,6 @@ def extract_rom(rom_bytes, language):


 if __name__ == '__main__':
-    memory_map = rt.MemoryMap('hirom')

    # search the vanilla directory for valid ROM files
    dir_list = os.listdir('vanilla')
--- a/tools/fix_dlg.py
+++ b/tools/fix_dlg.py
@ -2,18 +2,97 @@

 import romtools as rt
 import os
+import re
 import sys
 import json
-from encode_text import encode_text, update_text_inc
+
+ESCAPE_REGEX = r'{(\w+)(?:\:(\w+))?}'
+
+def escape_len(code):
+    param_match = re.match(ESCAPE_REGEX, code)
+    if param_match is None or param_match.group(2) == None:
+        return 1
+    elif param_match.group(2) == 'b':
+        return 2
+    elif param_match.group(2) == 'w':
+        return 3
+    else:
+        return 1
+
+
+def optimize_dte(dlg_def):
+
+    # make sure there is a DTE table
+    char_tables = dlg_def['char_tables']
+    if 'dte' not in char_tables:
+        print("Can't optimize DTE")
+        return
+
+    # create a text codec without DTE
+    char_tables.remove('dte')
+    dlg_def['char_tables'] = char_tables
+
+    # encode all of the dialogue
+    dlg_bytes, _ = rt.encode_text(dlg_def)
+
+    # find all valid pairs of characters
+    text_codec = rt.TextCodec(dlg_def)
+    dte_pairs = {}
+
+    i = 0
+    while i < len(dlg_bytes) - 2:
+        first_code = dlg_bytes[i]
+        first_char = text_codec.decoding_table[first_code]
+        if first_char[0] == '{':
+            i += escape_len(first_char)
+            continue
+
+        second_code = dlg_bytes[i + 1]
+        second_char = text_codec.decoding_table[second_code]
+        if second_char[0] == '{':
+            i += escape_len(second_char) + 1
+            continue
+
+        pair = first_char + second_char
+        i += 2
+
+        if pair in dte_pairs:
+            dte_pairs[pair] += 1
+        else:
+            dte_pairs[pair] = 1
+
+    # choose the 128 most common pairs
+    sorted_pairs = sorted(dte_pairs.items(), key=lambda pair: pair[1], reverse=True)
+
+    dte_char_table = {}
+    print('Most common char pairs:')
+    for i in range(128):
+        pair = sorted_pairs[i]
+        dte_char_table[rt.hex_string(i + 128)] = pair[0]
+        print(pair[0], pair[1])
+
+    # update the dte char table
+    dte_char_table_path = os.path.join('tools', 'char_table', 'dte.json')
+    with open(dte_char_table_path, 'w') as dte_char_table_file:
+        dte_char_table_file.write(json.dumps(dte_char_table, ensure_ascii=False, indent=2))
+
+    # update the dte text file
+    dte_list = [item[0] for item in sorted_pairs[:128]]
+    dte_json_path = os.path.join('src', 'text', 'dte_tbl_en.json')
+    with open(dte_json_path, 'r', encoding='utf8') as dte_json_file:
+        dte_json = json.load(dte_json_file)
+        dte_json['text'] = dte_list
+    with open(dte_json_path, 'w', encoding='utf8') as dte_json_file:
+        dte_json_file.write(json.dumps(dte_json, ensure_ascii=False, indent=2))


 def split_dlg(dlg1_def, dlg2_def):

    # find the first dialog offset beyond the first bank
-    item_offsets, _ = encode_text(dlg1_def)
+    _, item_ranges = rt.encode_text(dlg1_def)
    bank_inc = len(dlg1_def['text'])
-    for index, offset in enumerate(item_offsets):
-        if offset >= 0x010000:
+    for index, range in enumerate(item_ranges):
+        if range.begin >= 0x010000:
            bank_inc = index
            break

@ -48,6 +127,11 @@ if __name__ == '__main__':
    if dlg_cmd == 'split':
        split_dlg(dlg1_def, dlg2_def)

+        # save both dialogue json files
+        with open(dlg1_path, 'w', encoding='utf8') as dlg1_file, open(dlg2_path, 'w', encoding='utf8') as dlg2_file:
+            dlg1_file.write(json.dumps(dlg1_def, ensure_ascii=False, indent=2))
+            dlg2_file.write(json.dumps(dlg2_def, ensure_ascii=False, indent=2))
+
    elif dlg_cmd == 'combine':
        combine_dlg(dlg1_def, dlg2_def)

@ -55,10 +139,16 @@ if __name__ == '__main__':
        os.utime(dlg1_dat_path)
        os.utime(dlg2_dat_path)

+        # save both dialogue json files
+        with open(dlg1_path, 'w', encoding='utf8') as dlg1_file, open(dlg2_path, 'w', encoding='utf8') as dlg2_file:
+            dlg1_file.write(json.dumps(dlg1_def, ensure_ascii=False, indent=2))
+            dlg2_file.write(json.dumps(dlg2_def, ensure_ascii=False, indent=2))
+
+    elif dlg_cmd == 'dte':
+        optimize_dte(dlg1_def)
+        os.utime(dlg1_path)
+        os.utime(dlg2_path)
+
    else:
        raise ValueError('Invalid command:', dlg_cmd)

-    # save both dialogue json files
-    with open(dlg1_path, 'w', encoding='utf8') as dlg1_file, open(dlg2_path, 'w', encoding='utf8') as dlg2_file:
-        dlg1_file.write(json.dumps(dlg1_def, ensure_ascii=False, indent=2))
-        dlg2_file.write(json.dumps(dlg2_def, ensure_ascii=False, indent=2))
--- a/tools/optimize_dte.py
+++ b/tools/optimize_dte.py
@ -1,11 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import sys
-import romtools as rt
-
-# def optimize_dte():
-
-if __name__ == '__main__':
-
-    dlg_path = sys.argv[1]
--- a/tools/romtools/init.py
+++ b/tools/romtools/init.py
@ -1,7 +1,7 @@
 from romtools.range import Range
 from romtools.memory_map import MemoryMap
-from romtools.asset_extractor import AssetExtractor
-from romtools.text_codec import TextCodec
+from romtools.text_codec import TextCodec, encode_text
 from romtools.hex_string import hex_string
 from romtools.bytes_to_asm import bytes_to_asm
 from romtools.insert_asm import insert_asm
+from romtools.update_array_inc import update_array_inc
--- a/tools/romtools/asset_extractor.py
+++ b/tools/romtools/asset_extractor.py
@ -1,285 +0,0 @@
-import romtools as rt
-import json
-import os
-from ff6_lzss import *
-
-ASM_INDENT = ' ' * 8
-
-class AssetExtractor:
-
-    def __init__(self, rom_bytes, map_mode):
-        self.rom_bytes = rom_bytes
-        self.memory_map = rt.MemoryMap(map_mode)
-
-    def extract_object(self, asset_range, **kwargs):
-
-        # calculate the appropriate ROM range using the mapper
-        unmapped_range = rt.Range(asset_range)
-        mapped_range = self.memory_map.map_range(unmapped_range)
-
-        # extract the asset data
-        asset_bytes = self.rom_bytes[mapped_range.begin:mapped_range.end + 1]
-
-        # make a list of pointers for each item in the asset
-        pointer_list = []
-
-        if 'ptr_range' in kwargs:
-            # array with a pointer table
-            is_mapped = kwargs.get('is_mapped', False)
-            ptr_offset = kwargs.get('ptr_offset', 0)
-            if isinstance(ptr_offset, str):
-                ptr_offset = int(ptr_offset, 0)
-
-            if not is_mapped:
-                # map the pointer offset first, then add pointers
-                ptr_offset = self.memory_map.map_address(ptr_offset)
-
-            # extract the pointer table data
-            ptr_range = rt.Range(kwargs['ptr_range'])
-            ptr_range = self.memory_map.map_range(ptr_range)
-            ptr_data = self.rom_bytes[ptr_range.begin:ptr_range.end + 1]
-            ptr_size = kwargs.get('ptr_size', 2)
-            assert len(ptr_data) % ptr_size == 0, 'Pointer table length' \
-                + ' is not divisible by pointer size'
-            array_length = len(ptr_data) // ptr_size
-
-            for i in range(array_length):
-                pointer = ptr_data[i * ptr_size]
-                if ptr_size > 1:
-                    pointer |= ptr_data[i * ptr_size + 1] << 8
-                if ptr_size > 2:
-                    pointer |= ptr_data[i * ptr_size + 2] << 16
-                if ptr_size > 3:
-                    pointer |= ptr_data[i * ptr_size + 3] << 24
-
-                pointer += ptr_offset
-                if is_mapped:
-                    # map pointer after adding pointer offset
-                    pointer = self.memory_map.map_address(pointer)
-                pointer_list.append(pointer - mapped_range.begin)
-
-        elif 'item_offsets' in kwargs:
-            # items with specified offsets
-            item_offsets = kwargs['item_offsets']
-            array_length = len(item_offsets)
-            for begin in item_offsets:
-                if isinstance(begin, str):
-                    begin = int(begin, 0)
-                begin = self.memory_map.map_address(begin)
-                pointer_list.append(begin - mapped_range.begin)
-
-        elif 'terminator' in kwargs:
-            # terminated items
-            terminator = kwargs['terminator']
-            if isinstance(terminator, str):
-                terminator = int(terminator, 0)
-            pointer_list.append(0)
-            for p in range(len(asset_bytes) - 1):
-                if asset_bytes[p] == terminator:
-                    pointer_list.append(p + 1)
-            array_length = len(pointer_list)
-
-        elif 'item_size' in kwargs:
-            # fixed item size
-            item_size = kwargs['item_size']
-            if isinstance(item_size, str):
-                item_size = int(item_size, 0)
-            assert len(asset_bytes) % item_size == 0, \
-                'Fixed-length array size mismatch'
-            array_length = len(asset_bytes) // item_size
-            for i in range(array_length):
-                pointer_list.append(i * item_size)
-
-        else:
-            # single object
-            pointer_list.append(0)
-            array_length = 1
-
-        # remove duplicates and sort pointers
-        sorted_pointers = sorted(set(pointer_list))
-
-        # create a list of pointer ranges (these don't correspond with item
-        # ranges for terminated and sequential items)
-        pointer_ranges = {}
-        for p, pointer in enumerate(sorted_pointers):
-            begin = pointer
-            if p == len(sorted_pointers) - 1:
-                end = len(asset_bytes) - 1
-            else:
-                end = sorted_pointers[p + 1] - 1
-            pointer_ranges[begin] = rt.Range(begin, end)
-
-        # create ranges for each item
-        item_ranges = []
-
-        for i in range(array_length):
-            begin = pointer_list[i]
-            if 'terminator' in kwargs:
-                # item range goes until terminator is found
-                end = begin
-                terminator = kwargs['terminator']
-                if isinstance(terminator, str):
-                    terminator = int(terminator, 0)
-                while end < len(asset_bytes):
-                    if asset_bytes[end] == terminator:
-                        break
-                    end = end + 1
-                item_ranges.append(rt.Range(begin, end))
-
-            elif kwargs.get('is_sequential', False):
-                if i != array_length - 1:
-                    # item range goes up to next sequential pointer
-                    end = pointer_list[i + 1] - 1
-                else:
-                    # last item goes up to end of asset range
-                    end = len(asset_bytes) - 1
-                item_ranges.append(rt.Range(begin, end))
-
-            else:
-                # otherwise, item range is same as pointer range
-                item_ranges.append(pointer_ranges[begin])
-
-        return asset_bytes, item_ranges
-
-    def write_asset_file(self, asset_bytes, asset_path):
-
-        # create directories
-        os.makedirs(os.path.dirname(asset_path), exist_ok=True)
-
-        # decompress the data, if necessary
-        if asset_path.endswith('.lz'):
-            with open(asset_path[:-3], 'wb') as f:
-                f.write(decode_lzss(asset_bytes))
-
-        # save the raw data
-        with open(asset_path, 'wb') as f:
-            f.write(asset_bytes)
-
-    def extract_text(self, json_path, asset_range, **kwargs):
-
-        # read the json file
-        with open(json_path, 'r', encoding='utf8') as json_file:
-            asset_def = json.load(json_file)
-
-        if 'item_size' in asset_def:
-            kwargs['item_size'] = asset_def['item_size']
-
-        asset_label = asset_def['asset_label']
-        asset_root, _ = os.path.splitext(json_path)
-
-        # check if the data file already exists
-        dat_path = asset_root + '.dat'
-        if os.path.exists(dat_path):
-            return
-
-        # extract the text from the ROM
-        asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
-
-        # write data file
-        print(f'{asset_range} -> {json_path}')
-        self.write_asset_file(asset_bytes, dat_path)
-
-        # check if an include file exists
-        if 'inc_path' in asset_def:
-            inc_path = asset_def['inc_path']
-            assert os.path.exists(inc_path), f'Missing include file: {inc_path}'
-
-            # define the size
-            inc_text = ASM_INDENT + f'SIZE = {len(asset_bytes)}\n'
-
-            # define the array length
-            inc_text += ASM_INDENT + f'ARRAY_LENGTH = {len(item_ranges)}\n'
-
-            if 'item_size' in asset_def:
-                # fixed item size
-                inc_text += ASM_INDENT + f'ITEM_SIZE = '
-                inc_text += str(asset_def['item_size']) + '\n'
-            else:
-                # define item offsets
-                inc_text += '\n'
-                for id, item_range in enumerate(item_ranges):
-                    inc_text += ASM_INDENT + '_%d := ' % id
-                    inc_text += f'{asset_label} + $%04x\n' % item_range.begin
-
-            # update item offsets in the include file
-            rt.insert_asm(inc_path, inc_text)
-
-        # create the text codec
-        char_table = {}
-        for char_table_name in asset_def['char_tables']:
-            char_table_path = 'tools/char_table/' + char_table_name + '.json'
-            with open(char_table_path, 'r', encoding='utf8') as char_table_file:
-                char_table.update(json.load(char_table_file))
-        text_codec = rt.TextCodec(char_table)
-
-        # decode the text strings
-        text_list = []
-        for item_range in item_ranges:
-            item_bytes = asset_bytes[item_range.begin:item_range.end + 1]
-            text_list.append(text_codec.decode_text(item_bytes))
-
-        asset_def['text'] = text_list
-
-        # write text strings to the asset file
-        asset_json = json.dumps(asset_def, ensure_ascii=False, indent=2)
-        with open(json_path, 'w', encoding='utf8') as f:
-            f.write(asset_json)
-
-    def extract_array(self, file_path, inc_path, asset_range, asset_label, **kwargs):
-
-        # extract the array data from the ROM
-        asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
-
-        if os.path.exists(file_path):
-            return
-
-        # write data file
-        print(f'{asset_range} -> {file_path}')
-        self.write_asset_file(asset_bytes, file_path)
-
-        # check if an include file exists
-        if os.path.exists(inc_path):
-
-            # define the size
-            inc_text = ASM_INDENT + f'SIZE = {len(asset_bytes)}\n'
-
-            # define the array length
-            inc_text += ASM_INDENT + f'ARRAY_LENGTH = {len(item_ranges)}\n'
-
-            # define item offsets
-            inc_text += '\n'
-            for id, item_range in enumerate(item_ranges):
-                inc_text += ASM_INDENT + '_%d := ' % id
-                inc_text += f'{asset_label} + $%04x\n' % item_range.begin
-
-            # update item offsets in the include file
-            rt.insert_asm(inc_path, inc_text)
-
-
-    def extract_asset(self, file_path, asset_range, **kwargs):
-
-        # extract the asset from the ROM
-        asset_bytes, item_ranges = self.extract_object(asset_range, **kwargs)
-
-        # generate a list of file names
-        if 'file_list' in kwargs:
-            file_list = kwargs['file_list']
-            assert len(file_list) == len(item_ranges)
-        else:
-            file_list = [('%04x' % i) for i in range(len(item_ranges))]
-        path_list = [
-            file_path.replace('%s', file_list[i])
-            for i in range(len(item_ranges))
-        ]
-
-        extracted_one = False
-        for i, item_range in enumerate(item_ranges):
-            if os.path.exists(path_list[i]):
-                continue
-            if item_range.is_empty() or item_range.begin < 0:
-                continue
-            if not extracted_one:
-                extracted_one = True
-                print(f'{asset_range} -> {file_path}')
-            gfx_bytes = asset_bytes[item_range.begin:item_range.end + 1]
-            self.write_asset_file(gfx_bytes, path_list[i])
--- a/tools/romtools/bytes_to_asm.py
+++ b/tools/romtools/bytes_to_asm.py
@ -1,5 +1,10 @@
 import romtools as rt

+'''
+Converts a block of binary data into a string that can be interpreted by
+the ca65 compiler. Labels can be inserted and referenced by symbols to
+generate relocateable code.
+'''

 def bytes_to_asm(bytes, labels=None, symbols=None, line_width=16):
    asm_string = ''
@ -51,7 +56,7 @@ def bytes_to_asm(bytes, labels=None, symbols=None, line_width=16):
            # determine the value size
            if type == '.byte':
                width = 1
-            if type == '.word' or type == '.addr':
+            elif type == '.word' or type == '.addr':
                width = 2
            elif type == '.faraddr':
                width = 3
--- a/tools/romtools/hex_string.py
+++ b/tools/romtools/hex_string.py
@ -1,3 +1,9 @@
+'''
+Convert an integer into a string in hexadecimal format. The output string can
+optionally be padded with zeros and prefix can optionally be added to the
+front of the string.
+'''
+
 def hex_string(num, pad=None, prefix='0x'):
    if pad is not None:
        pad = int(pad)
--- a/tools/romtools/memory_map.py
+++ b/tools/romtools/memory_map.py
@ -1,5 +1,12 @@
 import romtools as rt

+'''
+Helper class to convert ROM file addresses between the address space of the
+ROM file and the memory mapper address space used by the console CPU. Because
+memory mapper address spaces are often non-contiguous at bank boundaries,
+ranges should be specified as closed intervals (end value is included). This
+is in contrast to the behavior of Python functions like range, etc.
+'''

 class MemoryMap:

--- a/tools/romtools/range.py
+++ b/tools/romtools/range.py
@ -1,5 +1,8 @@
 import romtools as rt

+'''
+Closed-interval range object suitable for defining regions in a ROM file.
+'''

 class Range:

--- a/tools/romtools/text_codec.py
+++ b/tools/romtools/text_codec.py
@ -1,13 +1,27 @@
 import re
+import json
 import romtools as rt

+'''
+Helper object for encoding and decoding ROM text. Text characters can map to
+one- or two-byte codes, and multiple text characters can map to the same code
+value (with the first value listed in the character map being the default).
+Text strings can include escape codes enclosed in braces "{}". Escape codes
+can optionally be followed by a one- or two-bytes parameter.
+'''

 ESCAPE_REGEX = r'{(\w+)(?:\:(\w+))?}'

-
 class TextCodec:

-    def __init__(self, char_table):
+    def __init__(self, asset_def):
+
+        # create the character table
+        char_table = {}
+        for char_table_name in asset_def['char_tables']:
+            char_table_path = 'tools/char_table/' + char_table_name + '.json'
+            with open(char_table_path, 'r', encoding='utf8') as char_table_file:
+                char_table.update(json.load(char_table_file))

        # merge multiple character tables into a single list
        if isinstance(char_table, list):
@ -49,13 +63,12 @@ class TextCodec:
                self.encoding_table[value] = code

            assert isinstance(primary_value, str)
-
            self.decoding_table[code] = primary_value

        self.encoding_keys = self.encoding_table.keys()
        self.terminator_code = self.encoding_table.get('{0}')

-    def decode_text(self, text_bytes):
+    def decode(self, text_bytes):
        text = ''
        i = 0

@ -118,7 +131,7 @@ class TextCodec:

        return text

-    def encode_text(self, text_str):
+    def encode(self, text_str):
        i = 0
        key_list = self.encoding_table.keys()
        text_codes = []
@ -233,3 +246,58 @@ class TextCodec:
                    i += int(escape_match.group(1))

        return min(i, len(text_bytes))
+
+def encode_text(asset_def):
+
+    text_codec = TextCodec(asset_def)
+
+    # encode each string
+    encoded_bytes = bytearray()
+    item_ranges = []
+    for text_item in asset_def['text']:
+        encoded_text = text_codec.encode(text_item)
+
+        if 'item_size' in asset_def:
+            # fixed length strings
+            item_size = asset_def['item_size']
+
+            # check if text is too long
+            assert len(encoded_text) <= item_size, \
+                f'Text string \"{text_item}\" too long by ' \
+                f'{len(encoded_text) - item_size} char(s)'
+
+            # pad the text
+            if len(encoded_text) != item_size:
+                assert '{pad}' in text_codec.encoding_table, \
+                    f'Padding not found in char table'
+                pad_char = text_codec.encoding_table['{pad}']
+                item_size = asset_def['item_size']
+                while len(encoded_text) < item_size:
+                    encoded_text.append(pad_char)
+
+            item_offset = len(encoded_bytes)
+            item_ranges.append(rt.Range(item_offset, item_offset + item_size))
+            encoded_bytes += encoded_text
+
+        elif 'is_sequential' in asset_def:
+            # items must be sequential, don't allow shared items
+            item_offset = len(encoded_bytes)
+            item_size = len(encoded_text)
+            item_ranges.append(rt.Range(item_offset, item_offset + item_size))
+            encoded_bytes += encoded_text
+
+        else:
+            # allow shared items
+            shared_offset = encoded_bytes.find(encoded_text)
+            item_size = len(encoded_text)
+            if shared_offset == -1:
+                item_offset = len(encoded_bytes)
+                item_ranges.append(rt.Range(item_offset, item_offset + item_size))
+                encoded_bytes += encoded_text
+            else:
+                item_ranges.append(rt.Range(shared_offset, shared_offset + item_size))
+
+    # update the include file
+    rt.update_array_inc(encoded_bytes, item_ranges, **asset_def)
+
+    return encoded_bytes, item_ranges
--- a/tools/romtools/update_array_inc.py
+++ b/tools/romtools/update_array_inc.py
@ -0,0 +1,40 @@
+import os
+import romtools as rt
+
+'''
+Write offsets for the items in an array into an include file.
+'''
+
+ASM_INDENT = ' ' * 8
+
+def update_array_inc(asset_bytes, item_ranges, **kwargs):
+
+    if 'inc_path' not in kwargs:
+        return
+
+    inc_path = kwargs['inc_path']
+    assert os.path.exists(inc_path), f'Missing include file: {inc_path}'
+
+    # get the asset label
+    assert 'asset_label' in kwargs, 'Missing asset_label'
+    asset_label = kwargs['asset_label']
+
+    # define the number of items in the array
+    inc_text = ASM_INDENT + f'ARRAY_LENGTH = {len(item_ranges)}\n'
+
+    if 'item_size' in kwargs:
+        # fixed item size
+        inc_text += ASM_INDENT + f'ITEM_SIZE = '
+        inc_text += str(kwargs['item_size']) + '\n'
+        inc_text += ASM_INDENT + 'SIZE = ARRAY_LENGTH * ITEM_SIZE\n'
+    else:
+        # variable item size
+        inc_text += ASM_INDENT + f'SIZE = {len(asset_bytes)}\n\n'
+
+        # define item offsets
+        for id, item_range in enumerate(item_ranges):
+            inc_text += ASM_INDENT + '_%d := ' % id
+            inc_text += f'{asset_label} + $%04x\n' % item_range.begin
+
+    # update item offsets in the include file
+    rt.insert_asm(inc_path, inc_text)