From 3f3a906fece93ce6f3e1e5dcfbf028d1dd530a81 Mon Sep 17 00:00:00 2001 From: pancake Date: Sun, 29 Sep 2024 21:09:50 +0200 Subject: [PATCH] Initial assembler support for m68k ##arch --- libr/arch/meson.build | 1 + libr/arch/p/m68k_cs.mk | 1 + libr/arch/p/m68k_cs/m68kass.c | 381 ++++++++++++++++++++++++++++++++++ libr/arch/p/m68k_cs/plugin.c | 16 +- 4 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 libr/arch/p/m68k_cs/m68kass.c diff --git a/libr/arch/meson.build b/libr/arch/meson.build index ac5385a0da..f48587a4d9 100644 --- a/libr/arch/meson.build +++ b/libr/arch/meson.build @@ -45,6 +45,7 @@ r_arch_sources = [ 'p/m68k_gnu/m68k-dis.c', 'p/m68k_gnu/m68k-opc.c', 'p/m68k_cs/plugin.c', +# 'p/m68k_cs/m68kass.c', 'p/m680x_cs/plugin.c', 'p/nds32/plugin.c', 'p/arm/gnu/floatformat.c', diff --git a/libr/arch/p/m68k_cs.mk b/libr/arch/p/m68k_cs.mk index 4a332bed0d..915f73fe51 100644 --- a/libr/arch/p/m68k_cs.mk +++ b/libr/arch/p/m68k_cs.mk @@ -1,4 +1,5 @@ OBJ_M68K_CS=p/m68k_cs/plugin.o +# OBJ_M68K_CS+=p/m68k_cs/m68kass.o include p/capstone.mk diff --git a/libr/arch/p/m68k_cs/m68kass.c b/libr/arch/p/m68k_cs/m68kass.c new file mode 100644 index 0000000000..2485791a08 --- /dev/null +++ b/libr/arch/p/m68k_cs/m68kass.c @@ -0,0 +1,381 @@ +/* radare2 - MIT - Copyright 2024 - pancake */ + +#include + +// Define the assembler function pointer type +typedef int (*AssemblerFunc)(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length); + +// Instruction struct to hold mnemonic and its assembler function +typedef struct { + const char* mnemonic; + AssemblerFunc assemble; +} Instruction; + +// Function to tokenize a string into tokens +static int tokenize(const char* str, char tokens[][256], int max_tokens) { + int num_tokens = 0; + const char* p = str; + while (*p != '\0' && num_tokens < max_tokens) { + // Skip leading delimiters + while (*p == ' ' || *p == '\t' || *p == ',') p++; + if (*p == '\0') break; + + // Copy the token into tokens[num_tokens] + int len = 0; + while (*p != '\0' && *p != ' ' && *p != '\t' && *p != ',') { + if (len < 255) { + tokens[num_tokens][len++] = *p; + } + p++; + } + tokens[num_tokens][len] = '\0'; + num_tokens++; + } + return num_tokens; +} + +// Function to map register name to register number +static int parse_register(const char* reg_str) { + if (reg_str[0] == 'D' || reg_str[0] == 'd') { + int reg_num = atoi(reg_str + 1); + if (reg_num >= 0 && reg_num <= 7) { + return reg_num; + } + } + return -1; // Invalid register +} + +// Implement assembler functions for each instruction +static int assemble_nop(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length < 2) { + return 0; // Cannot assemble, buffer too small + } + + // NOP opcode is 0x4E71 + output_buffer[0] = 0x4E; + output_buffer[1] = 0x71; + return 2; +} + +static int assemble_moveq(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length < 2 || num_tokens != 3) { + return 0; + } + + // tokens[0] = "MOVEQ" + // tokens[1] = "#" + // tokens[2] = "D" + + if (tokens[1][0] != '#') { + return 0; // Immediate value expected + } + + int immediate_value = atoi(tokens[1] + 1); // Skip the '#' + if (immediate_value < -128 || immediate_value > 127) { + return 0; // Immediate value out of range + } + + int register_number = parse_register(tokens[2]); + if (register_number < 0) { + return 0; // Invalid register + } + + // Opcode: 0x7000 | (Dn << 9) | (immediate_value & 0xFF) + unsigned short opcode = 0x7000; + opcode |= (register_number << 9); + opcode |= (immediate_value & 0xFF); + + output_buffer[0] = (opcode >> 8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + + return 2; +} + +static int assemble_addq(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length < 2 || num_tokens !=3) return 0; + + // tokens[0] = "ADDQ" + // tokens[1] = "#" + // tokens[2] = "D" + + if (tokens[1][0] != '#') return 0; + + int immediate_value = atoi(tokens[1]+1); + if (immediate_value < 1 || immediate_value > 8) { + return 0; + } + if (immediate_value == 8) immediate_value = 0; + + int register_number = parse_register(tokens[2]); + if (register_number < 0) { + return 0; // Invalid register + } + + unsigned short opcode = 0x5000; + opcode |= (immediate_value & 0x7) << 9; + opcode |= (1 <<6); // Size code: word size (01) + opcode |= (0 << 3); // Mode code for data register direct + opcode |= register_number & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_subq(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length < 2 || num_tokens !=3) return 0; + + // tokens[0] = "SUBQ" + // tokens[1] = "#" + // tokens[2] = "D" + + if (tokens[1][0] != '#') return 0; + + int immediate_value = atoi(tokens[1]+1); + if (immediate_value < 1 || immediate_value > 8) { + return 0; + } + if (immediate_value ==8) immediate_value = 0; + + int register_number = parse_register(tokens[2]); + if (register_number < 0) { + return 0; // Invalid register + } + + unsigned short opcode = 0x5100; + opcode |= (immediate_value & 0x7) << 9; + opcode |= (1 <<6); // Size code: word size (01) + opcode |= (0 << 3); // Mode code for data register direct + opcode |= register_number & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_bne(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (num_tokens !=2) return 0; + + // tokens[0] = "BNE" + // tokens[1] = "" + + int offset = atoi(tokens[1]); + + if (offset >= -128 && offset <= 127) { + if (buffer_length <2) return 0; + unsigned short opcode = 0x6600 | (offset & 0xFF); + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; + } else if (offset >= -32768 && offset <= 32767) { + if (buffer_length <4) return 0; + unsigned short opcode = 0x6600; + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + unsigned short displacement = offset & 0xFFFF; + output_buffer[2] = (displacement >>8) & 0xFF; + output_buffer[3] = displacement & 0xFF; + return 4; + } else { + return 0; // Offset out of range + } +} + +static int assemble_eor(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length <2 || num_tokens !=3) return 0; + + // tokens[0] = "EOR" + // tokens[1] = "D" + // tokens[2] = "D" + + int src_reg = parse_register(tokens[1]); + int dest_reg = parse_register(tokens[2]); + + if (src_reg <0 || dest_reg <0) return 0; + + unsigned short opcode = 0xB100; + opcode |= (src_reg << 9); + opcode |= (1 <<6); // Size code: word size + opcode |= (0 <<3); // dest_mode = 0 (data register direct) + opcode |= dest_reg & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_clr(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length <2 || num_tokens !=2) return 0; + + // tokens[0] = "CLR" + // tokens[1] = "D" + + int dest_reg = parse_register(tokens[1]); + if (dest_reg < 0) return 0; + + unsigned short opcode = 0x4200; + opcode |= (1 <<6); // Size code: word size + opcode |= (0 <<3); // dest_mode = 0 (data register direct) + opcode |= dest_reg & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_bchg(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length <2 || num_tokens !=3) return 0; + + // tokens[0] = "BCHG" + // tokens[1] = "D" + // tokens[2] = "D" + + int src_reg = parse_register(tokens[1]); + int dest_reg = parse_register(tokens[2]); + + if (src_reg < 0 || dest_reg < 0) return 0; + + unsigned short opcode = 0x0140; + opcode |= (src_reg <<9); + opcode |= (0 <<3); // dest_mode = 0 (data register direct) + opcode |= dest_reg & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_and(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length <2 || num_tokens !=3) return 0; + + // tokens[0] = "AND" + // tokens[1] = "D" + // tokens[2] = "D" + + int src_reg = parse_register(tokens[1]); + int dest_reg = parse_register(tokens[2]); + + if (src_reg < 0 || dest_reg < 0) return 0; + + unsigned short opcode = 0xC000; + opcode |= (src_reg <<9); + opcode |= (1 <<6); // Size code: word size + opcode |= (0 <<3); // dest_mode = 0 (data register direct) + opcode |= dest_reg & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_or(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length <2 || num_tokens !=3) return 0; + + // tokens[0] = "OR" + // tokens[1] = "D" + // tokens[2] = "D" + + int src_reg = parse_register(tokens[1]); + int dest_reg = parse_register(tokens[2]); + + if (src_reg < 0 || dest_reg < 0) return 0; + + unsigned short opcode = 0x8000; + opcode |= (src_reg <<9); + opcode |= (1 <<6); // Size code: word size + opcode |= (0 <<3); // dest_mode = 0 (data register direct) + opcode |= dest_reg & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + +static int assemble_btst(char tokens[][256], int num_tokens, unsigned char* output_buffer, int buffer_length) { + if (buffer_length <2 || num_tokens !=3) return 0; + + // tokens[0] = "BTST" + // tokens[1] = "D" + // tokens[2] = "D" + + int src_reg = parse_register(tokens[1]); + int dest_reg = parse_register(tokens[2]); + + if (src_reg < 0 || dest_reg < 0) return 0; + + unsigned short opcode = 0x0100; + opcode |= (src_reg <<9); + opcode |= (0 <<3); // dest_mode = 0 + opcode |= dest_reg & 0x7; + + output_buffer[0] = (opcode>>8) & 0xFF; + output_buffer[1] = opcode & 0xFF; + return 2; +} + + +// Global array of instructions +Instruction instruction_set[] = { + {"NOP", assemble_nop}, + {"MOVEQ", assemble_moveq}, + {"ADDQ", assemble_addq}, + {"SUBQ", assemble_subq}, + {"BNE", assemble_bne}, + {"EOR", assemble_eor}, + {"CLR", assemble_clr}, + {"BCHG", assemble_bchg}, + {"AND", assemble_and}, + {"OR", assemble_or}, + {"BTST", assemble_btst}, + {NULL, NULL} // Sentinel value to mark the end of the array +}; + +// Main assemble_instruction function +R_IPI int m68kass(const char* instruction_str, unsigned char* output_buffer, int buffer_length) { + // Convert instruction_str to uppercase + char instr_upper[256]; + int i = 0; + while (instruction_str[i] != '\0' && i < sizeof(instr_upper) - 1) { + instr_upper[i] = toupper((unsigned char)instruction_str[i]); + i++; + } + instr_upper[i] = '\0'; + + // TODO use r_str_split_list() + // Tokenize the instruction string + char tokens[10][256]; // Up to 10 tokens + int num_tokens = tokenize (instr_upper, tokens, 10); + if (num_tokens == 0) { + return 0; // Cannot parse instruction + } + + // Find the instruction in the instruction_set array + for (i = 0; instruction_set[i].mnemonic != NULL; i++) { + if (!strcmp (tokens[0], instruction_set[i].mnemonic)) { + // Found the instruction, call its assembler function + return instruction_set[i].assemble(tokens, num_tokens, output_buffer, buffer_length); + } + } + + // Instruction not found + return 0; +} +#if 0 +int main(int argc, char **argv) { + unsigned char out[4]; + if (argc < 2) { + printf ("usage %s [m68kop]\n", argv[0]); + return 1; + } + const char *text = argv[1]; + int len = assemble_instruction (text, out, sizeof (out)); + if (len > 0) { + int i; + for (i = 0; i< len; i++) { + printf ("%02x", out[i]); + } + printf ("\n"); + } + return 0; +} +#endif diff --git a/libr/arch/p/m68k_cs/plugin.c b/libr/arch/p/m68k_cs/plugin.c index 0a5a4639c6..47dcaf2419 100644 --- a/libr/arch/p/m68k_cs/plugin.c +++ b/libr/arch/p/m68k_cs/plugin.c @@ -1,7 +1,8 @@ -/* radare2 - LGPL - Copyright 2015-2022 - pancake */ +/* radare2 - LGPL - Copyright 2015-2024 - pancake */ #include #include +#include "m68kass.c" #ifdef CAPSTONE_M68K_H #define CAPSTONE_HAS_M68K 1 @@ -910,6 +911,18 @@ static bool fini(RArchSession *s) { return true; } +static bool encode(RArchSession *s, RAnalOp *op, ut32 mask) { + R_RETURN_VAL_IF_FAIL (s->data, false); + // PluginData *pd = s->data; + ut8 data[32] = {0}; + const int len = m68kass (op->mnemonic, data, sizeof (data)); + if (len > 0) { + r_anal_op_set_bytes (op, op->addr, data, len); + return true; + } + return false; +} + const RArchPlugin r_arch_plugin_m68k_cs = { .meta = { .name = "m68k", @@ -922,6 +935,7 @@ const RArchPlugin r_arch_plugin_m68k_cs = { .regs = regs, .bits = R_SYS_BITS_PACK1 (32), .decode = decode, + .encode = encode, .mnemonics = mnemonics, .init = init, .fini = fini,