diff --git a/Makefile b/Makefile index 734b71e..6d47bf6 100644 --- a/Makefile +++ b/Makefile @@ -21,19 +21,21 @@ endif ifeq ($(firstword $(filter x86_64,$(UNAME))),x86_64) - + HAVE_DYNAREC := 1 + CPU_ARCH := x86_32 else ifeq ($(firstword $(filter amd64,$(UNAME))),amd64) - + HAVE_DYNAREC := 1 + CPU_ARCH := x86_32 else ifeq ($(firstword $(filter x86,$(UNAME))),x86) FORCE_32BIT_ARCH = 1 + HAVE_DYNAREC := 1 + CPU_ARCH := x86_32 endif FORCE_32BIT := ifeq ($(FORCE_32BIT_ARCH),1) - HAVE_DYNAREC := 1 FORCE_32BIT := -m32 - CPU_ARCH := x86_32 endif # system platform diff --git a/cpu_threaded.c b/cpu_threaded.c index 0f500dc..0d3a989 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -2937,7 +2937,7 @@ block_data_type block_data[MAX_BLOCK_SIZE]; block_exit_type block_exits[MAX_EXITS]; #define smc_write_arm_yes() { \ - int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ + intptr_t offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ if(address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \ { \ address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) = \ @@ -2946,7 +2946,7 @@ block_exit_type block_exits[MAX_EXITS]; } #define smc_write_thumb_yes() { \ - int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ + intptr_t offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \ if(address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \ { \ address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) = \ diff --git a/memmap.c b/memmap.c index 84b86b4..63448da 100644 --- a/memmap.c +++ b/memmap.c @@ -1,6 +1,25 @@ +#include + #include "memmap.h" +// The JIT cache buffer is allocated via mmap (or win equivalent) so that it +// can be RWX. On top of that, we need the bufer to be "close" to the text +// segment, so that we can perform jumps between the two code blocks. +// Android and some other platforms discourage the usage of sections in the +// binary (ie. on-disk ELF) that are marked as executable and writtable for +// security reasons. Therefore we prefer to use mmap even though it can be +// tricky to map correctly. + +// To map a block close to the code, we take the function address as a proxy +// of the text section address, and try to map the cache next to it. This is +// an iterative process of trial and error that is hopefully successful. + +// x86-64 has a +/- 2GB offset requirement. +// ARM64 has a +/-128MB offset requirement. +// ARM32 has a +/- 32MB offset requirement (gpsp does not require this). +// MIPS requires blocks to be within the same 256MB boundary (identical 4 MSB) + #ifdef MMAP_JIT_CACHE #ifdef WIN32 @@ -9,7 +28,21 @@ #include void *map_jit_block(unsigned size) { - return VirtualAlloc(0, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); + unsigned i; + uintptr_t base = (uintptr_t)(map_jit_block) & (~0xFFFFFULL); + for (i = 0; i < 256; i++) { + int offset = ((i & 1) ? 1 : -1) * (i >> 1) * 1024 * 1024; + uintptr_t baddr = base + (intptr_t)offset; + if (!baddr) + continue; // Do not map NULL, bad things happen :) + + void *p = VirtualAlloc((void*)baddr, size, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE); + if (p == (void*)baddr) + return p; + if (p) + VirtualFree(p, 0, MEM_RELEASE); + } + return 0; } void unmap_jit_block(void *bufptr, unsigned size) { @@ -22,7 +55,22 @@ // Posix implementation void *map_jit_block(unsigned size) { - return mmap(0, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0); + unsigned i; + uintptr_t base = (uintptr_t)(map_jit_block) & (~0xFFFFFULL); + for (i = 0; i < 256; i++) { + int offset = ((i & 1) ? 1 : -1) * (i >> 1) * 1024 * 1024; + uintptr_t baddr = base + (intptr_t)offset; + if (!baddr) + continue; // Do not map NULL, bad things happen :) + + void *p = mmap((void*)baddr, size, PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_ANON|MAP_PRIVATE, -1, 0); + if (p == (void*)baddr) + return p; + if (p) + munmap(p, size); + } + return 0; } void unmap_jit_block(void *bufptr, unsigned size) { diff --git a/x86/x86_emit.h b/x86/x86_emit.h index b8b8532..88394d7 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -180,7 +180,7 @@ typedef enum } x86_condition_codes; #define x86_relative_offset(source, offset, next) \ - ((u32)offset - ((u32)source + next)) \ + ((u32)((uintptr_t)offset - ((uintptr_t)source + next))) \ #define x86_unequal_operands(op_a, op_b) \ (x86_reg_number_##op_a != x86_reg_number_##op_b) \ @@ -404,6 +404,17 @@ typedef enum #define reg_t0 esi #define reg_rv eax +#if defined(_WIN64) + #define reg_arg0 ecx + #define reg_arg1 edx +#elif defined(__x86_64__) || defined(__amd64__) + #define reg_arg0 edi + #define reg_arg1 esi +#else + #define reg_arg0 eax + #define reg_arg1 edx +#endif + /* Offsets from reg_base, see stub.S */ #define SPSR_BASE_OFF 0xA9100 @@ -588,11 +599,11 @@ typedef enum #define generate_indirect_branch_cycle_update(type) \ generate_cycle_update(); \ - x86_emit_jmp_offset(x86_relative_offset(translation_ptr, \ + x86_emit_call_offset(x86_relative_offset(translation_ptr, \ x86_indirect_branch_##type, 4)) \ #define generate_indirect_branch_no_cycle_update(type) \ - x86_emit_jmp_offset(x86_relative_offset(translation_ptr, \ + x86_emit_call_offset(x86_relative_offset(translation_ptr, \ x86_indirect_branch_##type, 4)) \ #define block_prologue_size 0 @@ -663,8 +674,8 @@ typedef enum } #define emit_trace_instruction(pc, mode) \ - x86_emit_mov_reg_imm(reg_a0, pc); \ - x86_emit_mov_reg_imm(reg_a1, mode); \ + x86_emit_mov_reg_imm(reg_arg0, pc); \ + x86_emit_mov_reg_imm(reg_arg1, mode); \ generate_function_call(trace_instruction); #define emit_trace_arm_instruction(pc) \ emit_trace_instruction(pc, 1) @@ -1062,9 +1073,8 @@ u32 function_cc execute_spsr_restore(u32 address) generate_store_reg(ireg, reg_index); \ if(reg_index == 15) \ { \ - generate_mov(a0, ireg); \ + generate_mov(arg0, ireg); \ generate_function_call(execute_spsr_restore); \ - generate_mov(a0, rv); \ generate_indirect_branch_dual(); \ } \ @@ -1355,18 +1365,17 @@ u32 function_cc execute_spsr_restore(u32 address) // store_mask and address are stored in the SAVE slots, since there's no real // register space to nicely pass them. -u32 function_cc execute_store_cpsr_body(u32 _cpsr) +u32 execute_store_cpsr_body() { - reg[REG_CPSR] = _cpsr; if(reg[REG_SAVE] & 0xFF) { - set_cpu_mode(cpu_modes[_cpsr & 0x1F]); + set_cpu_mode(cpu_modes[reg[REG_CPSR] & 0x1F]); if((io_registers[REG_IE] & io_registers[REG_IF]) && - io_registers[REG_IME] && ((_cpsr & 0x80) == 0)) + io_registers[REG_IME] && ((reg[REG_CPSR] & 0x80) == 0)) { reg_mode[MODE_IRQ][6] = reg[REG_SAVE2] + 4; - spsr[MODE_IRQ] = _cpsr; - reg[REG_CPSR] = (_cpsr & 0xFFFFFF00) | 0xD2; + spsr[MODE_IRQ] = reg[REG_CPSR]; + reg[REG_CPSR] = (reg[REG_CPSR] & 0xFFFFFF00) | 0xD2; set_cpu_mode(MODE_IRQ); return 0x00000018; } @@ -1518,7 +1527,6 @@ u32 function_cc execute_store_cpsr_body(u32 _cpsr) #define arm_block_memory_adjust_pc_load() \ if(reg_list & 0x8000) \ { \ - generate_mov(a0, rv); \ generate_indirect_branch_arm(); \ } \ @@ -1865,7 +1873,6 @@ u32 function_cc execute_store_cpsr_body(u32 _cpsr) generate_load_pc(a1, pc); \ generate_function_call(execute_load_u32); \ generate_store_reg(rv, REG_PC); \ - generate_mov(a0, rv); \ generate_indirect_branch_cycle_update(thumb) \ #define thumb_block_memory_extra_push_lr(base_reg) \ @@ -2138,7 +2145,7 @@ static void function_cc execute_swi(u32 pc) #define arm_swi() \ collapse_flags(a0, a1); \ - generate_load_pc(a0, (pc + 4)); \ + generate_load_pc(arg0, (pc + 4)); \ generate_function_call(execute_swi); \ generate_branch() \ @@ -2182,7 +2189,7 @@ static void function_cc execute_swi(u32 pc) #define thumb_swi() \ collapse_flags(a0, a1); \ - generate_load_pc(a0, (pc + 2)); \ + generate_load_pc(arg0, (pc + 2)); \ generate_function_call(execute_swi); \ generate_branch_cycle_update( \ block_exits[block_exit_position].branch_source, \ @@ -2233,8 +2240,8 @@ static void function_cc execute_swi(u32 pc) generate_load_pc(a0, pc); \ generate_indirect_branch_no_cycle_update(type) \ -extern u32 x86_table_data[9][16]; -extern u32 x86_table_info[9][16]; +extern void* x86_table_data[9][16]; +extern void* x86_table_info[9][16]; void init_emitter(void) { memcpy(x86_table_info, x86_table_data, sizeof(x86_table_data)); diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 8b6741c..8681510 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -26,28 +26,56 @@ symbol: \ _##symbol: -#ifndef _WIN32 -# External symbols (data + functions) -#define _update_gba update_gba -#define _block_lookup_address_arm block_lookup_address_arm -#define _block_lookup_address_thumb block_lookup_address_thumb -#define _block_lookup_address_dual block_lookup_address_dual -#define _write_io_register8 write_io_register8 -#define _write_io_register16 write_io_register16 -#define _write_io_register32 write_io_register32 -#define _flush_translation_cache_ram flush_translation_cache_ram -#define _write_eeprom write_eeprom -#define _write_backup write_backup -#define _write_rtc write_rtc -#define _read_memory8 read_memory8 -#define _read_memory8s read_memory8s -#define _read_memory16 read_memory16 -#define _read_memory16s read_memory16s -#define _read_memory32 read_memory32 -#define _execute_store_cpsr_body execute_store_cpsr_body +// Windows 32 bit ABI prefixes functions with underscore +#if defined(_WIN32) && !defined(_WIN64) + #define fnm(name) _##name +#else + #define fnm(name) name #endif -.extern _spsr +// Convention calls are different, and register allocations, which makes it tricky +// All functions in this file are called manually from the JIT arena (unless stated +// otherwise), where we use our own convention call. However calls to C code must +// follow the calling convention. x86 is built with regparm=2 to avoid stack usage. +#if defined(__x86_64__) || defined(__amd64__) + #define ADDR_TYPE .quad + #define ADDR_SIZE_BYTES 8 + #define STACK_REG %rsp + #define FULLREG(rn) %r##rn + #define SAVE_REGISTERS push %rbx; push %rsi; push %rdi; push %rbp + #define REST_REGISTERS pop %rbp; pop %rdi; pop %rsi; pop %rbx + #define REG_BASE %rbx + #ifdef _WIN64 + #define CARG1_REG %ecx // Windows x64 ABI, of course different :D + #define CARG2_REG %edx + #define CARG2_REGPTR %rdx + #define CALL_FUNC(name) \ + sub $32, %rsp; \ + call fnm(name); \ + add $32, %rsp + #else + #define CARG1_REG %edi // SystemV AMD64 ABI + #define CARG2_REG %esi + #define CARG2_REGPTR %rsi + #define CALL_FUNC(name) \ + call fnm(name) + #endif + #define SETUP_ARGS mov %eax, CARG1_REG; mov %edx, CARG2_REG; +#else + #define ADDR_TYPE .long + #define ADDR_SIZE_BYTES 4 + #define STACK_REG %esp + #define FULLREG(rn) %e##rn + #define SAVE_REGISTERS sub $8, %esp; push %ebx; push %esi; push %edi; push %ebp + #define REST_REGISTERS pop %ebp; pop %edi; pop %esi; pop %ebx; add $8, %esp; + #define REG_BASE %ebx + #define CARG1_REG %eax + #define CARG2_REG %edx + #define CARG2_REGPTR %edx + #define SETUP_ARGS + #define CALL_FUNC(name) \ + call fnm(name) +#endif .equ REG_SP, (13 * 4) .equ REG_LR, (14 * 4) @@ -69,15 +97,16 @@ _##symbol: .equ REG_SAVE4, (30 * 4) .equ REG_SAVE5, (31 * 4) -.equ load_u8_tbl, -(9 * 16 * 4) -.equ load_s8_tbl, -(8 * 16 * 4) -.equ load_u16_tbl, -(7 * 16 * 4) -.equ load_s16_tbl, -(6 * 16 * 4) -.equ load_u32_tbl, -(5 * 16 * 4) -.equ store_u8_tbl, -(4 * 16 * 4) -.equ store_u16_tbl, -(3 * 16 * 4) -.equ store_u32_tbl, -(2 * 16 * 4) -.equ store_aligned_u32_tbl, -(1 * 16 * 4) +.equ load_u8_tbl, -(9 * 16 * ADDR_SIZE_BYTES) +.equ load_s8_tbl, -(8 * 16 * ADDR_SIZE_BYTES) +.equ load_u16_tbl, -(7 * 16 * ADDR_SIZE_BYTES) +.equ load_s16_tbl, -(6 * 16 * ADDR_SIZE_BYTES) +.equ load_u32_tbl, -(5 * 16 * ADDR_SIZE_BYTES) +.equ store_u8_tbl, -(4 * 16 * ADDR_SIZE_BYTES) +.equ store_u16_tbl, -(3 * 16 * ADDR_SIZE_BYTES) +.equ store_u32_tbl, -(2 * 16 * ADDR_SIZE_BYTES) +.equ store_aligned_u32_tbl, -(1 * 16 * ADDR_SIZE_BYTES) + .equ PALETTE_RAM_OFF, 0x0100 .equ PALETTE_RAM_CNV_OFF, 0x0500 .equ OAM_RAM_OFF, 0x0900 @@ -93,7 +122,7 @@ _##symbol: # destroys ecx and edx .macro collapse_flag offset, shift - mov \offset(%ebx), %ecx + mov \offset(REG_BASE), %ecx shl $\shift, %ecx or %ecx, %edx .endm @@ -104,7 +133,7 @@ _##symbol: collapse_flag REG_Z_FLAG, 30 collapse_flag REG_C_FLAG, 29 collapse_flag REG_V_FLAG, 28 - mov REG_CPSR(%ebx), %ecx + mov REG_CPSR(REG_BASE), %ecx and $0xFF, %ecx or %ecx, %edx .endm @@ -112,14 +141,14 @@ _##symbol: .macro collapse_flags collapse_flags_no_update - mov %edx, REG_CPSR(%ebx) + mov %edx, REG_CPSR(REG_BASE) .endm .macro extract_flag shift, offset - mov REG_CPSR(%ebx), %edx + mov REG_CPSR(REG_BASE), %edx shr $\shift, %edx and $0x01, %edx - mov %edx, \offset(%ebx) + mov %edx, \offset(REG_BASE) .endm .macro extract_flags @@ -132,25 +161,21 @@ _##symbol: # Process a hardware event. Since an interrupt might be # raised we have to check if the PC has changed. -# eax: current address - -st: - .asciz "u\n" - +# arg0 (always in eax): current PC address defsymbl(x86_update_gba) - mov %eax, REG_PC(%ebx) # current PC = eax - collapse_flags # update cpsr, trashes ecx and edx + mov %eax, REG_PC(REG_BASE) # current PC = eax + collapse_flags # update cpsr, trashes ecx and edx - call _update_gba # process the next event + CALL_FUNC(update_gba) # process the next event - mov %eax, REG_CYCLES # new cycle count + mov %eax, REG_CYCLES # new cycle count # did we just complete a frame? go back to main then - cmpl $0, COMPLETED_FRAME(%ebx) + cmpl $0, COMPLETED_FRAME(REG_BASE) jne return_to_main # did the PC change? - cmpl $1, CHANGED_PC_STATUS(%ebx) + cmpl $1, CHANGED_PC_STATUS(REG_BASE) je lookup_pc ret # if not, go back to caller @@ -158,26 +183,33 @@ defsymbl(x86_update_gba) # ARM code, IE anything that changes the PC in ARM mode except # for BX and data processing to PC with the S bit set. -# eax: GBA address to branch to - +# arg0 (always in eax): GBA address to branch to defsymbl(x86_indirect_branch_arm) - call _block_lookup_address_arm - jmp *%eax + mov %eax, CARG1_REG + CALL_FUNC(block_lookup_address_arm) + add $ADDR_SIZE_BYTES, STACK_REG # remove current return addr + jmp *FULLREG(ax) # For indirect branches that'll definitely go to Thumb. In # Thumb mode any indirect branches except for BX. +# arg0 (always in eax): GBA address to branch to defsymbl(x86_indirect_branch_thumb) - call _block_lookup_address_thumb - jmp *%eax + mov %eax, CARG1_REG + CALL_FUNC(block_lookup_address_thumb) + add $ADDR_SIZE_BYTES, STACK_REG # remove current return addr + jmp *FULLREG(ax) # For indirect branches that can go to either Thumb or ARM, # mainly BX (also data processing to PC with S bit set, be # sure to adjust the target with a 1 in the lowest bit for this) +# arg0 (always in eax): GBA address to branch to defsymbl(x86_indirect_branch_dual) - call _block_lookup_address_dual - jmp *%eax + mov %eax, CARG1_REG + CALL_FUNC(block_lookup_address_dual) + add $ADDR_SIZE_BYTES, STACK_REG # remove current return addr + jmp *FULLREG(ax) # General ext memory routines @@ -192,12 +224,16 @@ ext_store_ignore: ext_store_rtc16: and $0xFFFF, %edx # make value 16bit and $0xFF, %eax # mask address - jmp _write_rtc # write out RTC register + SETUP_ARGS # Setup addr, value + CALL_FUNC(write_rtc) # write out RTC register + ret ext_store_backup8: and $0xFF, %edx # make value 8bit and $0xFFFF, %eax # mask address - jmp _write_backup # perform backup write + SETUP_ARGS # Setup addr, value + CALL_FUNC(write_backup) # perform backup write + ret @@ -210,14 +246,14 @@ write_epilogue: je smc_write alert_loop: - call _update_gba # process the next event + CALL_FUNC(update_gba) # process the next event # did we just complete a frame? go back to main then - cmpl $0, COMPLETED_FRAME(%ebx) + cmpl $0, COMPLETED_FRAME(REG_BASE) jne return_to_main # see if the halt status has changed - mov CPU_HALT_STATE(%ebx), %edx + mov CPU_HALT_STATE(REG_BASE), %edx cmp $0, %edx # 0 means it has jnz alert_loop # if not go again @@ -229,7 +265,8 @@ no_alert: ret ext_store_eeprom: - jmp _write_eeprom # perform eeprom write + CALL_FUNC(write_eeprom) # perform eeprom write + ret # Register wrapping for various sizes @@ -260,40 +297,41 @@ defsymbl(execute_##fname##_u##wsize) ;\ cmp $15, %ecx ;\ ja ext_store_ignore ;\ /* ecx = ext_store_u*_jtable[address >> 24] */ ;\ - jmp *fname##_u##wsize##_tbl(%ebx, %ecx, 4) ;\ + jmp *fname##_u##wsize##_tbl(REG_BASE, FULLREG(cx), ADDR_SIZE_BYTES) ;\ ;\ ext_##fname##_iwram##wsize: ;\ - and $(0x7FFF & addrm), %eax /* Addr wrap */ ;\ - mov regfn(d), (IWRAM_OFF+0x8000)(%ebx, %eax) /* Actual write */ ;\ - smc_check_##fname(opsuf, IWRAM_OFF(%ebx, %eax)) ;\ + and $(0x7FFF & addrm), %eax /* Addr wrap */ ;\ + mov regfn(d), (IWRAM_OFF+0x8000)(REG_BASE, FULLREG(ax)) /* Actual write */ ;\ + smc_check_##fname(opsuf, IWRAM_OFF(REG_BASE, FULLREG(ax))) ;\ ret ;\ ;\ ext_##fname##_ewram##wsize: ;\ - and $(0x3FFFF & addrm), %eax /* Addr wrap */ ;\ - mov regfn(d), EWRAM_OFF(%ebx, %eax) /* Actual write */ ;\ - smc_check_##fname(opsuf, (EWRAM_OFF+0x40000)(%ebx, %eax)) ;\ + and $(0x3FFFF & addrm), %eax /* Addr wrap */ ;\ + mov regfn(d), EWRAM_OFF(REG_BASE, FULLREG(ax)) /* Actual write */ ;\ + smc_check_##fname(opsuf, (EWRAM_OFF+0x40000)(REG_BASE, FULLREG(ax))) ;\ ret ;\ ;\ ext_##fname##_vram##wsize: ;\ - and $(0x1FFFE & addrm), %eax /* Addr wrap */ ;\ - dup8fn() /* Double byte for 8b access */ ;\ - cmp $0x18000, %eax /* Weird 96KB mirror */ ;\ + and $(0x1FFFE & addrm), %eax /* Addr wrap */ ;\ + dup8fn() /* Double byte for 8b access */ ;\ + cmp $0x18000, %eax /* Weird 96KB mirror */ ;\ jb 1f ;\ - sub $0x8000, %eax /* Mirror last bank */ ;\ + sub $0x8000, %eax /* Mirror last bank */ ;\ 1: ;\ - mov regfn16(d), VRAM_OFF(%ebx, %eax) /* Actual write */ ;\ + mov regfn16(d), VRAM_OFF(REG_BASE, FULLREG(ax)) /* Actual write */ ;\ ret ;\ ;\ ext_##fname##_oam##wsize: ;\ - and $(0x3FE & addrm), %eax /* Addr wrap */ ;\ - movl $1, OAM_UPDATED(%ebx) /* flag OAM update */ ;\ - dup8fn() /* Double byte for 8b access */ ;\ - mov regfn16(d), OAM_RAM_OFF(%ebx, %eax) /* Actual write */ ;\ + and $(0x3FE & addrm), %eax /* Addr wrap */ ;\ + movl $1, OAM_UPDATED(REG_BASE) /* flag OAM update */ ;\ + dup8fn() /* Double byte for 8b access */ ;\ + mov regfn16(d), OAM_RAM_OFF(REG_BASE, FULLREG(ax)) /* Actual write */ ;\ ret ;\ ;\ ext_##fname##_io##wsize: ;\ and $(0x3FF & addrm), %eax /* Addr wrap */ ;\ - call _write_io_register##wsize /* Call C code */ ;\ + SETUP_ARGS ;\ + CALL_FUNC(write_io_register##wsize) /* Call C code */ ;\ jmp write_epilogue /* Might need an update */ ;\ @@ -312,7 +350,7 @@ ext_store_palette8: ext_store_palette16: and $0x3FF, %eax # wrap around address ext_store_palette16b: # entry point for 8bit write - mov %dx, PALETTE_RAM_OFF(%ebx, %eax) # write out palette value + mov %dx, PALETTE_RAM_OFF(REG_BASE, FULLREG(ax)) # write out palette value mov %edx, %ecx # cx = dx shl $11, %ecx # cx <<= 11 (red component is in high bits) mov %dh, %cl # bottom bits of cx = top bits of dx @@ -321,7 +359,7 @@ ext_store_palette16b: # entry point for 8bit write shl $1, %dx # make green component 6bits or %edx, %ecx # combine green component into ecx # write out the freshly converted palette value - mov %cx, PALETTE_RAM_CNV_OFF(%ebx, %eax) + mov %cx, PALETTE_RAM_CNV_OFF(REG_BASE, FULLREG(ax)) ret # done ext_store_palette32: @@ -345,20 +383,20 @@ defsymbl(execute_load_##rtype) ;\ and $((1<<(8+albits))-1), %ecx /* preserve align+msb */ ;\ cmp $15, %ecx ;\ ja ext_load_slow##rtype ;\ - jmp *load_##rtype##_tbl(%ebx, %ecx, 4) ;\ + jmp *load_##rtype##_tbl(REG_BASE, FULLREG(cx), ADDR_SIZE_BYTES) ;\ ;\ ext_load_bios##rtype: ;\ - mov %edx, REG_PC(%ebx) /* Store current PC */ ;\ + mov %edx, REG_PC(REG_BASE) /* Store current PC */ ;\ jmp ext_load_slow##rtype ;\ ;\ ext_load_iwram##rtype: ;\ and $(0x7FFF & addrm), %eax /* Addr wrap */ ;\ - movop (IWRAM_OFF+0x8000)(%ebx, %eax), %eax /* Read mem */ ;\ + movop (IWRAM_OFF+0x8000)(REG_BASE, FULLREG(ax)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_ewram##rtype: ;\ and $(0x3FFFF & addrm), %eax /* Addr wrap */ ;\ - movop EWRAM_OFF(%ebx, %eax), %eax /* Read mem */ ;\ + movop EWRAM_OFF(REG_BASE, FULLREG(ax)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_vram##rtype: ;\ @@ -367,165 +405,155 @@ ext_load_vram##rtype: ;\ jb 1f ;\ sub $0x8000, %eax /* Mirror last bank */ ;\ 1: ;\ - movop VRAM_OFF(%ebx, %eax), %eax /* Read mem */ ;\ + movop VRAM_OFF(REG_BASE, FULLREG(ax)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_oam##rtype: ;\ and $(0x3FF & addrm), %eax /* Addr wrap */ ;\ - movop OAM_RAM_OFF(%ebx, %eax), %eax /* Read mem */ ;\ + movop OAM_RAM_OFF(REG_BASE, FULLREG(ax)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_palette##rtype: ;\ and $(0x3FF & addrm), %eax /* Addr wrap */ ;\ - movop PALETTE_RAM_OFF(%ebx, %eax), %eax /* Read mem */ ;\ + movop PALETTE_RAM_OFF(REG_BASE, FULLREG(ax)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_io##rtype: ;\ and $(0x3FF & addrm), %eax /* Addr wrap */ ;\ - movop IORAM_OFF(%ebx, %eax), %eax /* Read mem */ ;\ + movop IORAM_OFF(REG_BASE, FULLREG(ax)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_rom##rtype: ;\ mov %eax, %ecx /* ecx = address */ ;\ shr $15, %ecx /* ecx = address >> 15 */ ;\ - mov RDMAP_OFF(%ebx, %ecx, 4), %edx /* Read rdmap pointer */ ;\ + /* Read rdmap pointer */ ;\ + mov RDMAP_OFF(REG_BASE, FULLREG(cx), ADDR_SIZE_BYTES), FULLREG(dx) ;\ mov %eax, %ecx /* ecx = address */ ;\ and $0x7FFF, %ecx /* ecx = address LSB */ ;\ - movop (%edx, %ecx), %eax /* Read mem */ ;\ + movop (FULLREG(dx), FULLREG(cx)), %eax /* Read mem */ ;\ ret ;\ ;\ ext_load_slow##rtype: ;\ - jmp slowfn ;\ + SETUP_ARGS ;\ + CALL_FUNC(slowfn) ;\ + ret ;\ -load_stubs(u32, mov, ~3, 2, _read_memory32) -load_stubs(u16, movzwl, ~1, 1, _read_memory16) -load_stubs(s16, movswl, ~1, 1, _read_memory16s) -load_stubs( u8, movzbl, ~0, 0, _read_memory8) -load_stubs( s8, movsbl, ~0, 0, _read_memory8s) +load_stubs(u32, mov, ~3, 2, read_memory32) +load_stubs(u16, movzwl, ~1, 1, read_memory16) +load_stubs(s16, movswl, ~1, 1, read_memory16s) +load_stubs( u8, movzbl, ~0, 0, read_memory8) +load_stubs( s8, movsbl, ~0, 0, read_memory8s) -# %eax = new_cpsr -# %edx = store_mask - +# arg0 (%eax) = new_cpsr +# arg1 (%edx) = store_mask defsymbl(execute_store_cpsr) - mov %edx, REG_SAVE(%ebx) # save store_mask + mov %edx, REG_SAVE(REG_BASE) # save store_mask mov %eax, %ecx # ecx = new_cpsr and %edx, %ecx # ecx = new_cpsr & store_mask - mov REG_CPSR(%ebx), %eax # eax = cpsr + mov REG_CPSR(REG_BASE), %eax # eax = cpsr not %edx # edx = ~store_mask and %edx, %eax # eax = cpsr & ~store_mask or %ecx, %eax # eax = new cpsr combined with old + mov %eax, REG_CPSR(REG_BASE) # save new cpsr to register - call _execute_store_cpsr_body # do the dirty work in this C function - + CALL_FUNC(execute_store_cpsr_body) # do the dirty work in this C function extract_flags # pull out flag vars from new CPSR cmp $0, %eax # see if return value is 0 - jnz changed_pc_cpsr # might have changed the PC - + jnz 1f # might have changed the PC ret # return +1: # PC has changed, due to IRQ triggered + mov %eax, CARG1_REG # Returned addr from C function + CALL_FUNC(block_lookup_address_arm) # lookup new PC + add $ADDR_SIZE_BYTES, STACK_REG # get rid of current return address + jmp *FULLREG(ax) -changed_pc_cpsr: - add $4, %esp # get rid of current return address - call _block_lookup_address_arm # lookup new PC - jmp *%eax +# On writes that overwrite code, cache is flushed and execution re-started smc_write: - call _flush_translation_cache_ram - + CALL_FUNC(flush_translation_cache_ram) lookup_pc: - add $4, %esp # Can't return, discard addr - movl $0, CHANGED_PC_STATUS(%ebx) # Lookup new block and jump to it - mov REG_PC(%ebx), %eax - testl $0x20, REG_CPSR(%ebx) - jz lookup_pc_arm + movl $0, CHANGED_PC_STATUS(REG_BASE) # Lookup new block and jump to it + mov REG_PC(REG_BASE), CARG1_REG # Load PC as argument0 + testl $0x20, REG_CPSR(REG_BASE) + jz 1f +### Thumb mode + CALL_FUNC(block_lookup_address_thumb) + add $ADDR_SIZE_BYTES, STACK_REG # Can't return, discard addr + jmp *FULLREG(ax) +1:# ARM mode + CALL_FUNC(block_lookup_address_arm) + add $ADDR_SIZE_BYTES, STACK_REG # Can't return, discard addr + jmp *FULLREG(ax) -lookup_pc_thumb: - call _block_lookup_address_thumb - jmp *%eax - -lookup_pc_arm: - call _block_lookup_address_arm - jmp *%eax - -# eax: cycle counter +# Called from C, args are platform dependant :/ +# arg0 (eax/edi/ecx): cycle counter +# arg1 (edx/rsi/rdx): reg base pointer defsymbl(execute_arm_translate_internal) # Save main context, since we need to return gracefully - pushl %ebx - pushl %esi - pushl %edi - pushl %ebp + SAVE_REGISTERS # Pushes 16 or 32 bytes + # The stack here is aligned to 16 bytes minus 4 or 8 bytes. - movl %edx, %ebx # load base register (arg1) - extract_flags # load flag variables - movl %eax, REG_CYCLES # load cycle counter (arg0) + mov CARG1_REG, REG_CYCLES # load cycle counter (arg0) + mov CARG2_REGPTR, REG_BASE # load base register (arg1) - movl REG_PC(%ebx), %eax # load PC + extract_flags # load flag variables # (if the CPU is halted, do not start executing but # loop in the alert loop until it wakes up) - cmpl $0, CPU_HALT_STATE(%ebx) + cmpl $0, CPU_HALT_STATE(REG_BASE) je 1f - call alert_loop # Need to push something to the stack - + call alert_loop # Need to push something to the stack + 1: - testl $0x20, REG_CPSR(%ebx) - jnz 2f - - call _block_lookup_address_arm - jmp *%eax # jump to it - -2: - call _block_lookup_address_thumb - jmp *%eax + call lookup_pc # Go fetch and execute PC return_to_main: - add $4, %esp # remove current return addr - popl %ebp - popl %edi - popl %esi - popl %ebx + add $ADDR_SIZE_BYTES, STACK_REG # remove current return addr + REST_REGISTERS # Restore saved registers ret #define load_table(atype) ;\ - .long ext_load_bios##atype /* 0x00 BIOS */;\ - .long ext_load_slow##atype /* 0x01 open read */;\ - .long ext_load_ewram##atype /* 0x02 EWRAM */;\ - .long ext_load_iwram##atype /* 0x03 IWRAM */;\ - .long ext_load_io##atype /* 0x04 I/O registers */;\ - .long ext_load_palette##atype /* 0x05 Palette RAM */;\ - .long ext_load_vram##atype /* 0x06 VRAM */;\ - .long ext_load_oam##atype /* 0x07 OAM RAM */;\ - .long ext_load_rom##atype /* 0x08 gamepak (or RTC) */;\ - .long ext_load_rom##atype /* 0x09 gamepak */;\ - .long ext_load_rom##atype /* 0x0A gamepak */;\ - .long ext_load_rom##atype /* 0x0B gamepak */;\ - .long ext_load_rom##atype /* 0x0C gamepak */;\ - .long ext_load_slow##atype /* 0x0D EEPROM (possibly) */;\ - .long ext_load_slow##atype /* 0x0E Flash ROM/SRAM */;\ - .long ext_load_slow##atype /* 0x0F open read */;\ + ADDR_TYPE ext_load_bios##atype /* 0x00 BIOS */;\ + ADDR_TYPE ext_load_slow##atype /* 0x01 open read */;\ + ADDR_TYPE ext_load_ewram##atype /* 0x02 EWRAM */;\ + ADDR_TYPE ext_load_iwram##atype /* 0x03 IWRAM */;\ + ADDR_TYPE ext_load_io##atype /* 0x04 I/O registers */;\ + ADDR_TYPE ext_load_palette##atype /* 0x05 Palette RAM */;\ + ADDR_TYPE ext_load_vram##atype /* 0x06 VRAM */;\ + ADDR_TYPE ext_load_oam##atype /* 0x07 OAM RAM */;\ + ADDR_TYPE ext_load_rom##atype /* 0x08 gamepak (or RTC) */;\ + ADDR_TYPE ext_load_rom##atype /* 0x09 gamepak */;\ + ADDR_TYPE ext_load_rom##atype /* 0x0A gamepak */;\ + ADDR_TYPE ext_load_rom##atype /* 0x0B gamepak */;\ + ADDR_TYPE ext_load_rom##atype /* 0x0C gamepak */;\ + ADDR_TYPE ext_load_slow##atype /* 0x0D EEPROM (possibly) */;\ + ADDR_TYPE ext_load_slow##atype /* 0x0E Flash ROM/SRAM */;\ + ADDR_TYPE ext_load_slow##atype /* 0x0F open read */;\ #define store_table(asize) ;\ - .long ext_store_ignore /* 0x00 BIOS, ignore */;\ - .long ext_store_ignore /* 0x01 invalid, ignore */;\ - .long ext_store_ewram##asize /* 0x02 EWRAM */;\ - .long ext_store_iwram##asize /* 0x03 IWRAM */;\ - .long ext_store_io##asize /* 0x04 I/O registers */;\ - .long ext_store_palette##asize /* 0x05 Palette RAM */;\ - .long ext_store_vram##asize /* 0x06 VRAM */;\ - .long ext_store_oam##asize /* 0x07 OAM RAM */;\ - .long ext_store_rtc##asize /* 0x08 gamepak (RTC or ignore) */;\ - .long ext_store_ignore /* 0x09 gamepak, ignore */;\ - .long ext_store_ignore /* 0x0A gamepak, ignore */;\ - .long ext_store_ignore /* 0x0B gamepak, ignore */;\ - .long ext_store_ignore /* 0x0C gamepak, ignore */;\ - .long ext_store_eeprom /* 0x0D EEPROM (possibly) */;\ - .long ext_store_backup##asize /* 0x0E Flash ROM/SRAM */;\ - .long ext_store_ignore /* 0x0F ignore */;\ + ADDR_TYPE ext_store_ignore /* 0x00 BIOS, ignore */;\ + ADDR_TYPE ext_store_ignore /* 0x01 invalid, ignore */;\ + ADDR_TYPE ext_store_ewram##asize /* 0x02 EWRAM */;\ + ADDR_TYPE ext_store_iwram##asize /* 0x03 IWRAM */;\ + ADDR_TYPE ext_store_io##asize /* 0x04 I/O registers */;\ + ADDR_TYPE ext_store_palette##asize /* 0x05 Palette RAM */;\ + ADDR_TYPE ext_store_vram##asize /* 0x06 VRAM */;\ + ADDR_TYPE ext_store_oam##asize /* 0x07 OAM RAM */;\ + ADDR_TYPE ext_store_rtc##asize /* 0x08 gamepak (RTC or ignore) */;\ + ADDR_TYPE ext_store_ignore /* 0x09 gamepak, ignore */;\ + ADDR_TYPE ext_store_ignore /* 0x0A gamepak, ignore */;\ + ADDR_TYPE ext_store_ignore /* 0x0B gamepak, ignore */;\ + ADDR_TYPE ext_store_ignore /* 0x0C gamepak, ignore */;\ + ADDR_TYPE ext_store_eeprom /* 0x0D EEPROM (possibly) */;\ + ADDR_TYPE ext_store_backup##asize /* 0x0E Flash ROM/SRAM */;\ + ADDR_TYPE ext_store_ignore /* 0x0F ignore */;\ .data +.align 16 defsymbl(x86_table_data) load_table(u8) @@ -538,29 +566,29 @@ defsymbl(x86_table_data) store_table(32) # aligned word writes (non SMC signaling) - .long ext_store_ignore # 0x00 BIOS, ignore - .long ext_store_ignore # 0x01 invalid, ignore - .long ext_store_aligned_ewram32 # 0x02 EWRAM - .long ext_store_aligned_iwram32 # 0x03 IWRAM - .long ext_store_io32 # 0x04 I/O registers - .long ext_store_palette32 # 0x05 Palette RAM - .long ext_store_vram32 # 0x06 VRAM - .long ext_store_oam32 # 0x07 OAM RAM - .long ext_store_ignore # 0x08 gamepak, ignore (no RTC in 32bit) - .long ext_store_ignore # 0x09 gamepak, ignore - .long ext_store_ignore # 0x0A gamepak, ignore - .long ext_store_ignore # 0x0B gamepak, ignore - .long ext_store_ignore # 0x0C gamepak, ignore - .long ext_store_eeprom # 0x0D EEPROM (possibly) - .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit - .long ext_store_ignore # 0x0F ignore + ADDR_TYPE ext_store_ignore # 0x00 BIOS, ignore + ADDR_TYPE ext_store_ignore # 0x01 invalid, ignore + ADDR_TYPE ext_store_aligned_ewram32 # 0x02 EWRAM + ADDR_TYPE ext_store_aligned_iwram32 # 0x03 IWRAM + ADDR_TYPE ext_store_io32 # 0x04 I/O registers + ADDR_TYPE ext_store_palette32 # 0x05 Palette RAM + ADDR_TYPE ext_store_vram32 # 0x06 VRAM + ADDR_TYPE ext_store_oam32 # 0x07 OAM RAM + ADDR_TYPE ext_store_ignore # 0x08 gamepak, ignore (no RTC in 32bit) + ADDR_TYPE ext_store_ignore # 0x09 gamepak, ignore + ADDR_TYPE ext_store_ignore # 0x0A gamepak, ignore + ADDR_TYPE ext_store_ignore # 0x0B gamepak, ignore + ADDR_TYPE ext_store_ignore # 0x0C gamepak, ignore + ADDR_TYPE ext_store_eeprom # 0x0D EEPROM (possibly) + ADDR_TYPE ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit + ADDR_TYPE ext_store_ignore # 0x0F ignore .bss .align 64 defsymbl(x86_table_info) - .space 9*4*16 + .space 9*16*ADDR_SIZE_BYTES defsymbl(reg) .space 0x100 defsymbl(palette_ram) @@ -579,11 +607,12 @@ defsymbl(io_registers) .space 0x400 defsymbl(spsr) .space 24 + .space 8 # padding defsymbl(reg_mode) .space 196 - .space 36 # padding + .space 28 # padding defsymbl(memory_map_read) - .space 0x8000 + .space 8*1024*ADDR_SIZE_BYTES #ifndef MMAP_JIT_CACHE #error "x86 dynarec builds *require* MMAP_JIT_CACHE"