decoder: add ir opcode to metadata struct

Instead of using strcmp() on each decoded intruction's mnemonic to
translate it, we embedd an IR opcode into the struct. This is a very
barebones implementation and does not cover the entire ARM instruction
set. ARM instructions that does not have an IR opcode equivalent will be
marked with `OPCODE_TRAP` and should be implemented in the future.

Signed-off-by: Ronald Caesar <github43132@proton.me>
This commit is contained in:
Ronald Caesar
2026-01-13 23:31:06 -04:00
parent f910745a55
commit f72da3e121
4 changed files with 13281 additions and 13203 deletions

View File

@@ -10,6 +10,7 @@
#define BAL_DECODER_H #define BAL_DECODER_H
#include "bal_attributes.h" #include "bal_attributes.h"
#include "bal_types.h"
#include <stdint.h> #include <stdint.h>
#ifdef __cplusplus #ifdef __cplusplus
@@ -17,12 +18,15 @@ extern "C"
{ {
#endif #endif
/*! @brief Represents static metadata associated with a specific ARM32 /*!
* instruction. */ * @brief Represents static metadata associated with a specific ARM32
typedef struct * instruction.
*/
BAL_ALIGNED(32) typedef struct
{ {
/*!
/*! @brief The instruction mnemonic (e.g., "ADD", "LDR"). */ * @brief The instruction mnemonic (e.g., "ADD", "LDR").
*/
const char *name; const char *name;
/*! /*!
@@ -39,6 +43,13 @@ extern "C"
* @details (instruction & mask) == expected. * @details (instruction & mask) == expected.
*/ */
uint32_t expected; uint32_t expected;
/*!
* @brief The IR opcode equivalent to this instruction's mnemonic.
*/
bal_opcode_t ir_opcode;
char _pad[8];
} bal_decoder_instruction_metadata_t; } bal_decoder_instruction_metadata_t;
/*! /*!

View File

@@ -18,6 +18,24 @@ typedef enum
OPCODE_CONST, OPCODE_CONST,
OPCODE_MOV, OPCODE_MOV,
OPCODE_ADD, OPCODE_ADD,
OPCODE_SUB,
OPCODE_MUL,
OPCODE_DIV,
OPCODE_AND,
OPCODE_XOR,
OPCODE_OR_NOT,
OPCODE_SHIFT,
OPCODE_LOAD,
OPCODE_STORE,
OPCODE_JUMP,
OPCODE_CALL,
OPCODE_RETURN,
OPCODE_BRANCH_ZERO,
OPCODE_BRANCH_NOT_ZERO,
OPCODE_TEST_BIT_ZERO,
OPCODE_CMP,
OPCODE_CMP_COND,
OPCODE_TRAP,
OPCODE_EMUM_END = 0x7FF, // Force enum to 2 bytes. OPCODE_EMUM_END = 0x7FF, // Force enum to 2 bytes.
} bal_opcode_t; } bal_opcode_t;

File diff suppressed because it is too large Load Diff

View File

@@ -28,7 +28,6 @@ DEFAULT_XML_DIRECTORY_PATH = "../spec/arm64_xml/"
DECODER_HEADER_NAME = "bal_decoder.h" DECODER_HEADER_NAME = "bal_decoder.h"
DECODER_METADATA_STRUCT_NAME = "bal_decoder_instruction_metadata_t" DECODER_METADATA_STRUCT_NAME = "bal_decoder_instruction_metadata_t"
# There is no prefix because this struct will not be public
DECODER_HASH_TABLE_BUCKET_STRUCT_NAME = "decoder_bucket_t" DECODER_HASH_TABLE_BUCKET_STRUCT_NAME = "decoder_bucket_t"
DECODER_ARM64_INSTRUCTIONS_SIZE_NAME = "BAL_DECODER_ARM64_INSTRUCTIONS_SIZE" DECODER_ARM64_INSTRUCTIONS_SIZE_NAME = "BAL_DECODER_ARM64_INSTRUCTIONS_SIZE"
@@ -230,6 +229,51 @@ def parse_xml_file(filepath: str) -> List[A64Instruction]:
return list(instructions.values()) return list(instructions.values())
def derive_opcode(mnemonic: str) -> str:
""" Maps an ARM mnemonic to a Ballistic IR Opcode. """
m = mnemonic.upper()
if m in ["MOVZ", "MOVN"]:
return "OPCODE_CONST"
# ORR is often used for register moves.
if m == "ORR" or m == "MOV":
return "OPCODE_MOV"
if m.startswith("ADD"): return "OPCODE_ADD"
if m.startswith("SUB"): return "OPCODE_SUB"
if m.startswith("MUL") or m.startswith("MADD"): return "OPCODE_MUL"
if m.startswith("SDIV") or m.startswith("UDIV"): return "OPCODE_DIV"
if m.startswith("AND"): return "OPCODE_AND"
if m.startswith("EOR"): return "OPCODE_XOR"
# ORR is handled above, but ORN (Or Not) is distinct
if m.startswith("ORN"): return "OPCODE_OR_NOT"
if m in ["LSL", "LSR", "ASR", "ROR"]:
return "OPCODE_SHIFT"
if m.startswith("LDR") or m.startswith("LDU") or m.startswith("LDP"):
return "OPCODE_LOAD"
if m.startswith("STR") or m.startswith("STP"):
return "OPCODE_STORE"
if m == "B": return "OPCODE_JUMP"
if m == "BL": return "OPCODE_CALL"
if m == "RET": return "OPCODE_RETURN"
if m == "CBZ": return "OPCODE_BRANCH_ZERO"
if m == "CBNZ": return "OPCODE_BRANCH_NOT_ZERO"
if m.startswith("TBZ"): return "OPCODE_TEST_BIT_ZERO"
if m.startswith("CMP") or m.startswith("CMN"):
return "OPCODE_CMP"
if m.startswith("CCMP"):
return "OPCODE_CMP_COND"
# If we don't know what it is, map it to TRAP.
return "OPCODE_TRAP"
def generate_hash_table(instructions: List[A64Instruction]) -> Dict[int, List[A64Instruction]]: def generate_hash_table(instructions: List[A64Instruction]) -> Dict[int, List[A64Instruction]]:
buckets: Dict[int, List[A64Instruction]] = {i: [] for i in range(DECODER_HASH_TABLE_SIZE)} buckets: Dict[int, List[A64Instruction]] = {i: [] for i in range(DECODER_HASH_TABLE_SIZE)}
@@ -368,12 +412,15 @@ if __name__ == "__main__":
f.write(f"{GENERATED_FILE_WARNING}\n\n") f.write(f"{GENERATED_FILE_WARNING}\n\n")
f.write(f"/* Generated {len(all_instructions)} instructions */\n") f.write(f"/* Generated {len(all_instructions)} instructions */\n")
f.write(f'#include "{decoder_generated_header_name}"\n\n') f.write(f'#include "{decoder_generated_header_name}"\n\n')
f.write(f'#include "bal_types.h"\n\n')
f.write( f.write(
f"const {DECODER_METADATA_STRUCT_NAME} {DECODER_ARM64_GLOBAL_INSTRUCTIONS_ARRAY_NAME}[{DECODER_ARM64_INSTRUCTIONS_SIZE_NAME}] = {{\n" f"const {DECODER_METADATA_STRUCT_NAME} {DECODER_ARM64_GLOBAL_INSTRUCTIONS_ARRAY_NAME}[{DECODER_ARM64_INSTRUCTIONS_SIZE_NAME}] = {{\n"
) )
for inst in all_instructions: for inst in all_instructions:
ir_opcode: str = derive_opcode(inst.mnemonic)
f.write( f.write(
f' {{ "{inst.mnemonic}", 0x{inst.mask:08X}, 0x{inst.value:08X} }}, \n' f' {{ "{inst.mnemonic}", 0x{inst.mask:08X}, 0x{inst.value:08X}, {ir_opcode} }}, \n'
) )
f.write("};") f.write("};")