[DYNAREC] Beggining of emitter and dynarec infrastructure (untested yet)

This commit is contained in:
ptitSeb 2021-03-15 15:36:30 +01:00
parent 4919f161cc
commit 21f8b852f2
16 changed files with 2225 additions and 70 deletions

View File

@ -243,56 +243,56 @@ if(ARM_DYNAREC)
"${BOX64_ROOT}/src/dynarec/arm64_lock_helper.S"
)
#set(DYNAREC_PASS
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_helper.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_emit_tests.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_emit_math.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_emit_logic.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_emit_shift.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_pass.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_00.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_0f.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_64.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_65.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_66.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_67.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_d8.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_d9.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_da.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_db.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_dc.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_dd.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_de.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_df.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_f0.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_660f.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_f20f.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm_f30f.c"
#)
set(DYNAREC_PASS
"${BOX64_ROOT}/src/dynarec/dynarec_arm64_helper.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_emit_tests.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_emit_math.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_emit_logic.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_emit_shift.c"
"${BOX64_ROOT}/src/dynarec/dynarec_arm64_pass.c"
"${BOX64_ROOT}/src/dynarec/dynarec_arm64_00.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_0f.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_64.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_65.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_66.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_67.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d8.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_d9.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_da.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_db.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_dc.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_dd.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_de.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_df.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_f0.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_660f.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_f20f.c"
#"${BOX64_ROOT}/src/dynarec/dynarec_arm64_f30f.c"
)
#set(DYNAREC_PASS "${BOX64_ROOT}/src/wrapped/generated/wrapper.h" ${DYNAREC_PASS})
set(DYNAREC_PASS "${BOX64_ROOT}/src/wrapped/generated/wrapper.h" ${DYNAREC_PASS})
add_library(dynarec_arm OBJECT ${DYNAREC_SRC})
#add_library(arm64_pass0 OBJECT ${DYNAREC_PASS})
#set_target_properties(arm64_pass0 PROPERTIES COMPILE_FLAGS "-DSTEP=0")
#add_library(arm64_pass1 OBJECT ${DYNAREC_PASS})
#set_target_properties(arm64_pass1 PROPERTIES COMPILE_FLAGS "-DSTEP=1")
#add_library(arm64_pass2 OBJECT ${DYNAREC_PASS})
#set_target_properties(arm64_pass2 PROPERTIES COMPILE_FLAGS "-DSTEP=2")
#add_library(arm64_pass3 OBJECT ${DYNAREC_PASS})
#set_target_properties(arm64_pass3 PROPERTIES COMPILE_FLAGS "-DSTEP=3")
#add_dependencies(arm64_pass0 WRAPPERS)
#add_dependencies(arm64_pass1 WRAPPERS)
#add_dependencies(arm64_pass2 WRAPPERS)
#add_dependencies(arm64_pass3 WRAPPERS)
add_library(arm64_pass0 OBJECT ${DYNAREC_PASS})
set_target_properties(arm64_pass0 PROPERTIES COMPILE_FLAGS "-DSTEP=0")
add_library(arm64_pass1 OBJECT ${DYNAREC_PASS})
set_target_properties(arm64_pass1 PROPERTIES COMPILE_FLAGS "-DSTEP=1")
add_library(arm64_pass2 OBJECT ${DYNAREC_PASS})
set_target_properties(arm64_pass2 PROPERTIES COMPILE_FLAGS "-DSTEP=2")
add_library(arm64_pass3 OBJECT ${DYNAREC_PASS})
set_target_properties(arm64_pass3 PROPERTIES COMPILE_FLAGS "-DSTEP=3")
add_dependencies(arm64_pass0 WRAPPERS)
add_dependencies(arm64_pass1 WRAPPERS)
add_dependencies(arm64_pass2 WRAPPERS)
add_dependencies(arm64_pass3 WRAPPERS)
add_library(dynarec STATIC
$<TARGET_OBJECTS:dynarec_arm>
#$<TARGET_OBJECTS:arm64_pass0>
#$<TARGET_OBJECTS:arm64_pass1>
#$<TARGET_OBJECTS:arm64_pass2>
#$<TARGET_OBJECTS:arm64_pass3>
$<TARGET_OBJECTS:arm64_pass0>
$<TARGET_OBJECTS:arm64_pass1>
$<TARGET_OBJECTS:arm64_pass2>
$<TARGET_OBJECTS:arm64_pass3>
)
endif()

282
src/dynarec/arm64_emitter.h Executable file
View File

@ -0,0 +1,282 @@
#ifndef __ARM64_EMITTER_H__
#define __ARM64_EMITTER_H__
/*
ARM64 Emitter
*/
// x86 Register mapping
#define xRAX 10
#define xRCX 11
#define xRDX 12
#define xRBX 13
#define xRSP 14
#define xRBP 15
#define xRSI 16
#define xRDI 17
#define xR8 18
#define xR9 19
#define xR10 20
#define xR11 21
#define xR12 22
#define xR13 23
#define xR14 24
#define xR15 25
#define xFlags 26
#define xRIP 27
// 32bits version
#define wEAX xRAX
#define wECX xRCX
#define wEDX xRDX
#define wEBX xRBX
#define wESP xRSP
#define wEBP xRBP
#define wESI xRSI
#define wEDI xRDI
#define wR8 xR8
#define wR9 xR9
#define wR10 xR10
#define wR11 xR11
#define wR12 xR12
#define wR13 xR13
#define wR14 xR14
#define wR15 xR15
#define wFlags xFlags
// scratch registers
#define x1 1
#define x2 2
#define x3 3
#define x4 4
#define x5 5
#define x6 6
#define x7 7
// 32bits version of scratch
#define w1 x1
#define w2 x2
#define w3 x3
#define w4 x4
#define w5 x5
#define w6 x6
#define w7 x7
// emu is r0
#define xEmu 0
// ARM64 SP is r31 but is a special register
#define xSP 31
// xZR regs is 31
#define xZR 31
#define wZR xZR
// MOVZ
#define MOVZ_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b10<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd))
#define MOVZx(Rd, imm16) EMIT(MOVZ_gen(1, 0, (imm16)&0xffff, Rd))
#define MOVZx_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(1, (shift)/16, (imm16)&0xffff, Rd))
#define MOVZw(Rd, imm16) EMIT(MOVZ_gen(0, 0, (imm16)&0xffff, Rd))
#define MOVZw_LSL(Rd, imm16, shift) EMIT(MOVZ_gen(0, (shift)/16, (imm16)&0xffff, Rd))
// MOVN
#define MOVN_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b00<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd))
#define MOVNx(Rd, imm16) EMIT(MOVN_gen(1, 0, (imm16)&0xffff, Rd))
#define MOVNx_LSL(Rd, imm16, shift) EMIT(MOVN_gen(1, (shift)/16, (imm16)&0xffff, Rd))
#define MOVNw(Rd, imm16) EMIT(MOVN_gen(0, 0, (imm16)&0xffff, Rd))
#define MOVNw_LSL(Rd, imm16, shift) EMIT(MOVN_gen(0, (shift)/16, (imm16)&0xffff, Rd))
// MOVK
#define MOVK_gen(sf, hw, imm16, Rd) ((sf)<<31 | 0b11<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd))
#define MOVKx(Rd, imm16) EMIT(MOVK_gen(1, 0, (imm16)&0xffff, Rd))
#define MOVKx_LSL(Rd, imm16, shift) EMIT(MOVK_gen(1, (shift)/16, (imm16)&0xffff, Rd))
#define MOVKw(Rd, imm16) EMIT(MOVK_gen(0, 0, (imm16)&0xffff, Rd))
#define MOVKw_LSL(Rd, imm16, shift) EMIT(MOVK_gen(0, (shift)/16, (imm16)&0xffff, Rd))
#define MOV32w(Rd, imm32) {MOVZw(Rd, (imm32)&0xffff); if((imm32)&0xffff0000) {MOVKw_LSL(Rd, ((imm32)>>16)&0xffff, 16);}}
#define MOV64x(Rd, imm64) { \
MOVZx(Rd, ((uint64_t)(imm64))&0xffff); \
if(((uint64_t)(imm64))&0xffff0000) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>16)&0xffff, 16);} \
if(((uint64_t)(imm64))&0xffff00000000L) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>32)&0xffff, 32);} \
if(((uint64_t)(imm64))&0xffff000000000000L) {MOVKx_LSL(Rd, (((uint64_t)(imm64))>>48)&0xffff, 48);}\
}
// ADD / SUB
#define ADDSUB_REG_gen(sf, op, S, shift, Rm, imm6, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | (shift)<<22 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
#define ADDx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, 0, Rn, Rd))
#define ADDSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd))
#define ADDx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, lsl, Rn, Rd))
#define ADDw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd))
#define ADDSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd))
#define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | 1<<23 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd))
#define ADDx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0, (imm12)&0xfff, Rn, Rd))
#define ADDSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 0, 0, 0, (imm12)&0xfff, Rn, Rd))
#define ADDw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0, (imm12)&0xfff, Rn, Rd))
#define ADDSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 0, 0, 0, (imm12)&0xfff, Rn, Rd))
#define SUBx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd))
#define SUBSx_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd))
#define SUBx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd))
#define SUBw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd))
#define SUBSw_REG(Rd, Rn, Rm) EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd))
#define SUBx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0, (imm12)&0xfff, Rn, Rd))
#define SUBSx_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(1, 1, 0, 0, (imm12)&0xfff, Rn, Rd))
#define SUBw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0, (imm12)&0xfff, Rn, Rd))
#define SUBSw_U12(Rd, Rn, imm12) EMIT(ADDSUB_IMM_gen(0, 1, 0, 0, (imm12)&0xfff, Rn, Rd))
// LDR
#define LDR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
#define LDRx_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
#define LDRx_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
#define LDRw_S9_postindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
#define LDRw_S9_preindex(Rt, Rn, imm9) EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
#define LD_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
#define LDRx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b11, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define LDRw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b10, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define LDRB_U12(Rt, Rn, imm12) EMIT(LD_gen(0b00, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define LDRH_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define LDR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b01<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt))
#define LDRx_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt))
#define LDRx_REG_LSL3(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt))
#define LDRx_REG_UXTW(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b11, Rm, 0b010, 0, Rn, Rt))
#define LDRw_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt))
#define LDRw_REG_LSL2(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt))
#define LDRB_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt))
#define LDRH_REG(Rt, Rn, Rm) EMIT(LDR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt))
#define LDRSH_gen(size, op1, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
#define LDRSHx_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b00, 0b10, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define LDRSHw_U12(Rt, Rn, imm12) EMIT(LD_gen(0b01, 0b00, 0b11, ((uint32_t)(imm12))&0xfff, Rn, Rt))
// STR
#define STR_gen(size, op1, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
#define STRx_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
#define STRx_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
#define STRw_S9_postindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
#define STRw_S9_preindex(Rt, Rn, imm9) EMIT(STR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
#define ST_gen(size, op1, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b00<<22 | (imm12)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
#define STRx_U12(Rt, Rn, imm12) EMIT(ST_gen(0b11, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define STRw_U12(Rt, Rn, imm12) EMIT(ST_gen(0b10, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define STRB_U12(Rt, Rn, imm12) EMIT(ST_gen(0b00, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define STRH_U12(Rt, Rn, imm12) EMIT(ST_gen(0b01, 0b01, ((uint32_t)(imm12))&0xfff, Rn, Rt))
#define STR_REG_gen(size, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 0b00<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | (0b10)<<10 | (Rn)<<5 | (Rt))
#define STRx_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 0, Rn, Rt))
#define STRx_REG_LSL3(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b011, 1, Rn, Rt))
#define STRx_REG_UXTW(Rt, Rn, Rm) EMIT(STR_REG_gen(0b11, Rm, 0b010, 0, Rn, Rt))
#define STRw_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b10, Rm, 0b011, 0, Rn, Rt))
#define STRw_REG_LSL2(Rt, Rn, Rm) EMIT(STR_REG_gen(0b10, Rm, 0b011, 1, Rn, Rt))
#define STRB_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b00, Rm, 0b011, 0, Rn, Rt))
#define STRH_REG(Rt, Rn, Rm) EMIT(STR_REG_gen(0b01, Rm, 0b011, 0, Rn, Rt))
// PUSH / POP helper
#define POP1(reg) LDRx_S9_postindex(reg, xRSP, 8)
#define PUSH1(reg) STRx_S9_preindex(reg, xRSP, -8)
// BR and branchs
#define BR_gen(Z, op, A, M, Rn, Rm) (0b1101011<<25 | (Z)<<24 | (op)<<21 | 0b11111<<16 | (A)<<11 | (M)<<10 | (Rn)<<5 | (Rm))
#define BR(Rn) EMIT(BR_gen(0, 0b00, 0, 0, Rn, 0))
#define BLR(Rn) EMIT(BR_gen(0, 0b01, 0, 0, Rn, 0))
#define CB_gen(sf, op, imm19, Rt) ((sf)<<31 | 0b011010<<25 | (op)<<24 | (imm19)<<5 | (Rt))
#define CBNZx(Rt, imm19) EMIT(CB_gen(1, 1, ((imm19)>>2)&0x80000, Rt))
#define CBNZw(Rt, imm19) EMIT(CB_gen(0, 1, ((imm19)>>2)&0x80000, Rt))
#define CBZx(Rt, imm19) EMIT(CB_gen(1, 0, ((imm19)>>2)&0x80000, Rt))
#define CBZw(Rt, imm19) EMIT(CB_gen(0, 0, ((imm19)>>2)&0x80000, Rt))
// AND / ORR
#define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn) | Rd)
#define ANDx_U13(Rd, Rn, imm13) EMIT(LOGIC_gen(1, 0b00, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
#define ANDw_U12(Rd, Rn, imm12) EMIT(LOGIC_gen(0, 0b00, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
#define ANDSx_U13(Rd, Rn, imm13) EMIT(LOGIC_gen(1, 0b11, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
#define ANDSw_U12(Rd, Rn, imm12) EMIT(LOGIC_gen(0, 0b11, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
#define ORRx_U13(Rd, Rn, imm13) EMIT(LOGIC_gen(1, 0b01, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
#define ORRw_U12(Rd, Rn, imm12) EMIT(LOGIC_gen(0, 0b01, 0, (imm12)&0b111111, ((imm12)>>6)&0b111111, Rn, Rd))
#define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
#define ANDx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
#define ANDw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
#define ANDSx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
#define ANDSw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
#define ORRx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
#define ORRx_REG_LSL(Rd, Rn, Rm, lsl) EMIT(LOGIC_REG_gen(1, 0b01, lsl, 0, Rm, 0, Rn, Rd))
#define ORRw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
#define ORNx_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 1, Rm, 0, Rn, Rd))
#define ORNw_REG(Rd, Rn, Rm) EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 1, Rm, 0, Rn, Rd))
#define MOVx(Rm, Rd) ORRx_REG(Rd, xZR, Rm)
#define MOVx_LSL(Rm, Rd, lsl) ORRx_REG_LSL(Rd, xZR, Rm, lsl)
#define MOVw(Rm, Rd) ORRw_REG(Rd, xZR, Rm)
#define MVNx(Rm, Rd) ORNx_REG(Rd, xZR, Rm)
#define MVNx_LSL(Rm, Rd, lsl) ORNx_REG_LSL(Rd, xZR, Rm, lsl)
#define MVNw(Rm, Rd) ORNw_REG(Rd, xZR, Rm)
#define MOV_frmSP(Rd) ADDx_U12(Rd, xSP, 0)
#define MOV_toSP(Rm) ADDx_U12(xSP, Rm, 0)
// BFI
#define BFM_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
#define BFMx(Rd, Rn, immr, imms) EMIT(BFM_gen(1, 0b01, 1, immr, imms, Rn, Rd))
#define BFMw(Rd, Rn, immr, imms) EMIT(BFM_gen(0, 0b01, 0, immr, imms, Rn, Rd))
#define BFIx(Rd, Rn, lsb, width) BFMx(Rd, Rn, (-lsb)%64, (width)-1)
#define BFIw(Rd, Rn, lsb, width) BFMw(Rd, Rn, (-lsb)%32, (width)-1)
#define BFCx(Rd, Rn, lsb, width) BFMx(Rd, xZR, (-lsb)%64, (width)-1)
#define BFCw(Rd, Rn, lsb, width) BFMw(Rd, xZR, (-lsb)%32, (width)-1)
// UBFX
#define UBFM_gen(sf, N, immr, imms, Rn, Rd) ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
#define UBFXx(Rd, Rn, lsb, width) EMIT(UBFM_gen(1, 1, (lsb), (lsb)+(width)-1, Rn, Rd))
#define UBFXw(Rd, Rn, lsb, width) EMIT(UBFM_gen(0, 1, (lsb), (lsb)+(width)-1, Rn, Rd))
#define UXTBx(Rd, Rn) EMIT(UBFM_gen(1, 1, 0, 7, Rn, Rd))
#define UXTBw(Rd, Rn) EMIT(UBFM_gen(0, 1, 0, 7, Rn, Rd))
#define UXTHx(Rd, Rn) EMIT(UBFM_gen(1, 1, 0, 15, Rn, Rd))
#define UXTHw(Rd, Rn) EMIT(UBFM_gen(0, 1, 0, 15, Rn, Rd))
#define LSRx(Rd, Rn, shift) EMIT(UBFM_gen(1, 1, shift, 63, Rn, Rd))
#define LSRw(Rd, Rn, shift) EMIT(UBFM_gen(0, 0, shift, 31, Rn, Rd))
// LSRV
#define LSRV_gen(sf, Rm, op2, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd))
#define LSRx_REG(Rd, Rn, Rm) EMIT(LSRV_gen(1, Rm, 0b01, Rn, Rd))
#define LSRw_REG(Rd, Rn, Rm) EMIT(LSRV_gen(0, Rm, 0b01, Rn, Rd))
// MRS
#define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt) (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt))
// mrs x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0
#define VMRS_nzvc(Rt) EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt))
#define VMSR_nzvc(Rt) EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
// mrs x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000 o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2
#define VMRS(Rt) EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt))
#define VMSR(Rt) EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt))
// VLDR
#define VMEM_gen(size, opc, imm12, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | 0b01<<24 | (opc)<<22 | (imm12)<<10 | (Rn)<<5 | (Rt))
// imm15 must be 3-aligned
#define VLDR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b01, ((imm15)>>3)&0xfff, Rn, Dt))
// imm16 must be 4-aligned
#define VLDR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b11, 0b11, ((imm16)>>4)&0xfff, Rn, Qt))
// imm15 must be 3-aligned
#define VSTR64_U12(Dt, Rn, imm15) EMIT(VMEM_gen(0b11, 0b00, ((imm15)>>3)&0xfff, Rn, Dt))
// imm16 must be 4-aligned
#define VSTR128_U12(Qt, Rn, imm16) EMIT(VMEM_gen(0b11, 0b10, ((imm16)>>4)&0xfff, Rn, Qt))
#define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
#define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt))
#define VLDR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b11, Rn, Rt))
#define VLDR128_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b11, (imm9)&0x1ff, 0b01, Rn, Rt))
#define VLDR128_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b11, (imm9)&0x1ff, 0b11, Rn, Rt))
#define VSTR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
#define VSTR64_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
#define VSTR128_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b10, (imm9)&0x1ff, 0b01, Rn, Rt))
#define VSTR128_S9_preindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b10, (imm9)&0x1ff, 0b11, Rn, Rt))
#define VMEM_REG_gen(size, opc, Rm, option, S, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | 1<<21 | (Rm)<<16 | (option)<<13 | (S)<<12 | 0b10<<10 | (Rn)<<5 | (Rt))
#define VLDR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 0, Rn, Dt))
#define VLDR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b01, Rm, 0b011, 1, Rn, Dt))
#define VLDR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 0, Rn, Dt))
#define VLDR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b11, Rm, 0b011, 1, Rn, Dt))
#define VSTR64_REG(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 0, Rn, Dt))
#define VSTR64_REG_LSL3(Dt, Rn, Rm) EMIT(VMEM_REG_gen(0b11, 0b00, Rm, 0b011, 1, Rn, Dt))
#define VSTR128_REG(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 0, Rn, Dt))
#define VSTR128_REG_LSL4(Qt, Rn, Rm) EMIT(VMEM_REG_gen(0b00, 0b10, Rm, 0b011, 1, Rn, Dt))
#endif //__ARM64_EMITTER_H__

View File

@ -66,11 +66,11 @@ arm64_lock_write_dd:
arm64_lock_xchg:
// address is x0, value is x1, return old value in x0
ldaxr w2, [x0]
stlxr w3, w1, [x0]
ldaxr x2, [x0]
stlxr w3, x1, [x0]
cmp w3, #1
beq arm64_lock_xchg
mov w0, w2
mov x0, x2
ret
arm64_lock_storeifnull:

View File

@ -58,7 +58,7 @@ dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct)
void FreeDynablock(dynablock_t* db)
{
if(db) {
dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size, db->father, db->sons_size, db->gone);
dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p parent=%p, father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size, db->parent, db->father, db->sons_size, db->gone);
if(db->gone)
return; // already in the process of deletion!
db->done = 0;
@ -327,7 +327,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
pthread_mutex_lock(&my_context->mutex_dyndump);
// fill the block
block->x64_addr = (void*)addr;
if(0/*!FillBlock64(block, filladdr)*/) {
if(FillBlock64(block, filladdr)) {
void* old = (void*)arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], 0);
if(old!=block && old) {// put it back in place, strange things are happening here!
dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old);

View File

@ -310,7 +310,7 @@ void arm_pass1(dynarec_arm_t* dyn, uintptr_t addr);
void arm_pass2(dynarec_arm_t* dyn, uintptr_t addr);
void arm_pass3(dynarec_arm_t* dyn, uintptr_t addr);
void* FillBlock(dynablock_t* block, uintptr_t addr) {
void* FillBlock64(dynablock_t* block, uintptr_t addr) {
if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end)
return NULL;
// init the helper
@ -320,7 +320,6 @@ void* FillBlock(dynablock_t* block, uintptr_t addr) {
if(!helper.size) {
dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
block->done = 1;
free(helper.next);
return (void*)block;
}
helper.cap = helper.size+3; // needs epilog handling

54
src/dynarec/dynarec_arm64_00.c Executable file
View File

@ -0,0 +1,54 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include <signal.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "bridge.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_arm64.h"
#include "dynarec_arm64_private.h"
#include "arm64_printer.h"
#include "dynarec_arm64_functions.h"
#include "dynarec_arm64_helper.h"
uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog)
{
uint8_t nextop, opcode;
uint8_t gd, ed;
int8_t i8;
int32_t i32, j32, tmp;
uint8_t u8;
uint8_t gb1, gb2, eb1, eb2;
uint32_t u32;
uint8_t wback, wb1, wb2;
int fixedaddress;
opcode = F8;
MAYUSE(eb1);
MAYUSE(eb2);
MAYUSE(tmp);
MAYUSE(j32);
switch(opcode) {
default:
DEFAULT;
}
return addr;
}

View File

@ -193,23 +193,12 @@ int fpu_get_scratch_single(dynarec_arm_t* dyn)
// Get a FPU double scratch reg
int fpu_get_scratch_double(dynarec_arm_t* dyn)
{
int i = (dyn->fpu_scratch+1)&(~1);
dyn->fpu_scratch = i+2;
return i/2; // return a Dx
return dyn->fpu_scratch++; // return an Dx (same as Sx)
}
// Get a FPU quad scratch reg
int fpu_get_scratch_quad(dynarec_arm_t* dyn)
{
if(dyn->fpu_scratch>4) {
if(dyn->fpu_extra_qscratch) {
dynarec_log(LOG_NONE, "Warning, Extra QScratch slot taken and need another one!\n");
} else
dyn->fpu_extra_qscratch = fpu_get_reg_quad(dyn);
return dyn->fpu_extra_qscratch;
}
int i = (dyn->fpu_scratch+3)&(~3);
dyn->fpu_scratch = i+4;
return i/2; // Return a Dx, not a Qx
return dyn->fpu_scratch++; // return an Qx (same as Dx or Sx)
}
// Reset scratch regs counter
void fpu_reset_scratch(dynarec_arm_t* dyn)
@ -241,15 +230,15 @@ void fpu_free_reg_double(dynarec_arm_t* dyn, int reg)
int fpu_get_reg_quad(dynarec_arm_t* dyn)
{
int i=0;
while (dyn->fpuused[i] || dyn->fpuused[i+1]) i+=2;
dyn->fpuused[i] = dyn->fpuused[i+1] = 1;
return i+FPUFIRST; // Return a Dx, not a Qx
while (dyn->fpuused[i]) ++i;
dyn->fpuused[i] = 1;
return i+FPUFIRST; // return a Qx, it's the same as Dx on aarch64
}
// Free a FPU quad reg
void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg)
{
int i=reg-FPUFIRST;
dyn->fpuused[i] = dyn->fpuused[i+1] = 0;
dyn->fpuused[i] = 0;
}
// Reset fpu regs counter
void fpu_reset_reg(dynarec_arm_t* dyn)

View File

@ -0,0 +1,958 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "callback.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_arm64.h"
#include "dynarec_arm64_private.h"
#include "dynablock_private.h"
#include "arm64_printer.h"
#include "../tools/bridge_private.h"
#include "custommem.h"
#include "dynarec_arm64_functions.h"
#include "dynarec_arm64_helper.h"
/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */
uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta)
{
uint8_t ret = x2;
uint8_t scratch = x2;
*fixaddress = 0;
if(hint>0) ret = hint;
if(hint>0 && hint<xRAX) scratch = hint;
int absmin = 0;
if(s) absmin=-absmax;
MAYUSE(scratch);
if(!(nextop&0xC0)) {
if((nextop&7)==4) {
uint8_t sib = F8;
int sib_reg = (sib>>3)&7;
if((sib&0x7)==5) {
uint64_t tmp = F32S64;
if (sib_reg!=4) {
if(tmp && ((tmp<absmin) || (tmp>absmax) || (tmp&mask))) {
MOV64x(scratch, tmp);
ADDx_REG_LSL(ret, scratch, xRAX+sib_reg+(rex.x<<3), (sib>>6));
} else {
MOVx_LSL(ret, xRAX+sib_reg+(rex.x<<3), (sib>>6));
*fixaddress = tmp;
}
} else {
MOV64x(ret, tmp);
}
} else {
if (sib_reg!=4) {
ADDx_REG_LSL(ret, xRAX+(sib&0x7)+(rex.b<<3), xRAX+sib_reg+(rex.x<<3), (sib>>6));
} else {
ret = xRAX+(sib&0x7)+(rex.b<<3);
}
}
} else if((nextop&7)==5) {
uint64_t tmp = F32S64;
MOV64x(ret, tmp);
MOV64x(xRIP, addr+delta);
ADDx_REG(ret, ret, xRIP);
} else {
ret = xRAX+(nextop&7)+(rex.b<<3);
}
} else {
int64_t i64;
uint8_t sib = 0;
int sib_reg = 0;
if((nextop&7)==4) {
sib = F8;
sib_reg = (sib>>3)&7;
}
if(nextop&0x80)
i64 = F32S;
else
i64 = F8S;
if(i64==0 || ((i64>=absmin) && (i64<=absmax) && !(i64&mask))) {
*fixaddress = i64;
if((nextop&7)==4) {
if (sib_reg!=4) {
ADDx_REG_LSL(ret, xRAX+(sib&0x07)+(rex.b<<3), xRAX+sib_reg+(rex.x<<3), (sib>>6));
} else {
ret = xRAX+(sib&0x07)+rex.b<<3;
}
} else
ret = xRAX+(nextop&0x07)+rex.b<<3;
} else {
int64_t sub = (i64<0)?1:0;
if(sub) i64 = -i64;
if(i64<0x1000) {
if((nextop&7)==4) {
if (sib_reg!=4) {
ADDx_REG_LSL(scratch, xRAX+(sib&0x07), xRAX+sib_reg, (sib>>6));
} else {
scratch = xRAX+(sib&0x07);
}
} else
scratch = xRAX+(nextop&0x07);
if(sub) {
SUBx_U12(ret, scratch, i64);
} else {
ADDx_U12(ret, scratch, i64);
}
} else {
MOV64x(scratch, i64);
if((nextop&7)==4) {
if (sib_reg!=4) {
if(sub) {
SUBx_REG(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch);
} else {
ADDx_REG(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3));
}
ADDx_REG_LSL(ret, scratch, xRAX+sib_reg+(rex.x<<3), (sib>>6));
} else {
PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3));
if(sub) {
SUBx_REG(ret, tmp, scratch);
} else {
ADDx_REG(ret, tmp, scratch);
}
}
} else {
PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3));
if(sub) {
SUBx_REG(ret, tmp, scratch);
} else {
ADDx_REG(ret, tmp, scratch);
}
}
}
}
}
*ed = ret;
return addr;
}
/* setup r2 to address pointed by ED, r3 as scratch also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */
uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int* fixaddress, int absmax, uint32_t mask, int s)
{
uint8_t ret = x2;
uint8_t scratch = x3;
*fixaddress = 0;
if(hint>0) ret = hint;
if(scratch==ret) scratch = x2;
MAYUSE(scratch);
uint32_t m = nextop&0xC7;
uint32_t n = (m>>6)&3;
int32_t offset = 0;
int absmin = 0;
if(s) absmin = -absmax;
if(!n && m==6) {
offset = F16;
MOVZw(ret, offset);
} else {
switch(n) {
case 0: offset = 0; break;
case 1: offset = F8S; break;
case 2: offset = F16S; break;
}
if(offset && (offset>absmax || offset<absmin || (offset&mask))) {
*fixaddress = offset;
offset = 0;
}
switch(m&7) {
case 0: //R_BX + R_SI
UXTHx(ret, xRBX);
UXTHx(scratch, xRSI);
ADDx_REG(ret, ret, scratch);
break;
case 1: //R_BX + R_DI
UXTHx(ret, xRBX);
UXTHx(scratch, xRDI);
ADDx_REG(ret, ret, scratch);
break;
case 2: //R_BP + R_SI
UXTHx(ret, xRBP);
UXTHx(scratch, xRSI);
ADDx_REG(ret, ret, scratch);
break;
case 3: //R_BP + R_DI
UXTHx(ret, xRBP);
UXTHx(scratch, xRDI);
ADDx_REG(ret, ret, scratch);
break;
case 4: //R_SI
UXTHx(ret, xRSI);
break;
case 5: //R_DI
UXTHx(ret, xRDI);
break;
case 6: //R_BP
UXTHx(ret, xRBP);
break;
case 7: //R_BX
UXTHx(ret, xRBX);
break;
}
if(offset) {
if(offset<0 && offset>-0x1000) {
SUBx_U12(ret, ret, -offset);
} else if(offset>0 && offset<0x1000) {
ADDx_U12(ret, ret, offset);
} else {
MOV64x(scratch, offset);
ADDx_REG(ret, ret, scratch);
}
}
}
*ed = ret;
return addr;
}
void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
{
MESSAGE(LOG_DUMP, "Jump to epilog\n");
if(reg) {
if(reg!=xRIP) {
MOVx(xRIP, reg);
}
} else {
MOV64x(xRIP, ip);
}
MOV64x(x2, (uintptr_t)arm64_epilog);
BR(x2);
}
void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
{
MESSAGE(LOG_DUMP, "Jump to next\n");
if(reg) {
if(reg!=xRIP) {
MOVx(xRIP, reg);
}
uintptr_t tbl = getJumpTable64();
MOV64x(x2, tbl);
UBFXx(x3, xRIP, 48, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, 32, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, 16, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
LDRx_REG_UXTW(x3, x2, xRIP);
} else {
uintptr_t p = getJumpTableAddress64(ip);
MOV64x(x2, p);
MOV64x(xRIP, ip);
LDRx_U12(x3, x2, 0);
}
MOVx(x1, xRIP);
#ifdef HAVE_TRACE
//MOVx(x2, 15); no access to PC reg
#endif
BR(x3);
}
void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
{
MESSAGE(LOG_DUMP, "Ret next\n");
POP1(xRIP);
uintptr_t tbl = getJumpTable64();
MOV64x(x2, tbl);
UBFXx(x3, xRIP, 48, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, 32, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, 16, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
LDRx_REG_UXTW(x2, x2, xRIP);
BR(x2);
}
void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
{
MESSAGE(LOG_DUMP, "Retn epilog\n");
POP1(xRIP);
if(n>0xfff) {
MOV32w(w1, n);
ADDx_REG(xRSP, xRSP, x1);
} else {
ADDx_U12(xRSP, xRSP, n);
}
uintptr_t tbl = getJumpTable64();
MOV64x(x2, tbl);
UBFXx(x3, xRIP, 48, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, 32, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, 16, JMPTABL_SHIFT);
LDRx_REG_LSL3(x2, x2, x3);
LDRx_REG_UXTW(x2, x2, xRIP);
BR(x2);
}
void iret_to_epilog(dynarec_arm_t* dyn, int ninst)
{
MESSAGE(LOG_DUMP, "IRet epilog\n");
// POP IP
POP1(xRIP);
// POP CS
POP1(x2);
STRH_U12(x2, xEmu, offsetof(x64emu_t, segs[_CS]));
MOVZw(x1, 0);
STRx_U12(x1, xEmu, offsetof(x64emu_t, segs_serial[_CS]));
// POP EFLAGS
POP1(xFlags);
MOV32w(x1, 0x3F7FD7);
ANDx_REG(xFlags, xFlags, x1);
ORRx_U13(xFlags, xFlags, 2);
SET_DFNONE(x1);
// Ret....
MOV64x(x2, (uintptr_t)arm64_epilog); // epilog on purpose, CS might have changed!
BR(x2);
}
void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags)
{
if(ret!=-2) {
STRx_S9_preindex(xSP, xEmu, -16); // ARM64 stack needs to be 16byte aligned
}
fpu_pushcache(dyn, ninst, reg);
if(saveflags) {
STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
}
MOV64x(reg, (uintptr_t)fnc);
BLR(reg);
fpu_popcache(dyn, ninst, reg);
if(ret>=0) {
MOVx(ret, xEmu);
}
if(ret!=-2) {
LDRx_S9_postindex(xSP, xEmu, 16);
}
if(saveflags) {
LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
}
SET_NODF();
}
void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
{
MESSAGE(LOG_DUMP, "Get TLSData\n");
int32_t j32;
MAYUSE(j32);
int t1 = x1, t2 = x4;
if(reg==t1) ++t1;
if(reg==t2) ++t2;
LDRx_U12(t1, xEmu, offsetof(x64emu_t, context));
LDRx_U12(t2, xEmu, offsetof(x64emu_t, segs_serial[_GS])); // complete check here
LDRx_U12(t1, t1, offsetof(box64context_t, sel_serial));
LDRx_U12(reg, xEmu, offsetof(x64emu_t, segs_offs[_GS])); // no condition LDR
SUBx_REG(t1, t1, t2);
CBZx_MARKSEG(t1);
MOVZw(x1, _GS);
call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1);
MARKSEG;
MESSAGE(LOG_DUMP, "----TLSData\n");
}
void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
{
int32_t j32;
MAYUSE(j32);
MESSAGE(LOG_DUMP, "Get FS: Offset\n");
int t1 = x1, t2 = x4;
if(reg==t1) ++t1;
if(reg==t2) ++t2;
LDRx_U12(t2, xEmu, offsetof(x64emu_t, segs_serial[_FS]));// fast check here
LDRx_U12(reg, xEmu, offsetof(x64emu_t, segs_offs[_FS]));
CBZx_MARKSEG(t2);
MOVZw(x1, _FS);
call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 1);
MARKSEG;
MESSAGE(LOG_DUMP, "----FS: Offset\n");
}
// x87 stuffs
static void x87_reset(dynarec_arm_t* dyn, int ninst)
{
#if STEP > 1
for (int i=0; i<8; ++i)
dyn->x87cache[i] = -1;
dyn->x87stack = 0;
#endif
}
void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
{
#if STEP > 1
if(!dyn->x87stack)
return;
MESSAGE(LOG_DUMP, "\tSynch x87 Stackcount (%d)\n", dyn->x87stack);
int a = dyn->x87stack;
// Add x87stack to emu fpu_stack
LDRx_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
if(a>0) {
ADDx_U12(scratch, scratch, a);
} else {
SUBx_U12(scratch, scratch, -a);
}
STRx_U12(scratch, xEmu, offsetof(x64emu_t, fpu_stack));
// Sub x87stack to top, with and 7
LDRx_U12(scratch, xEmu, offsetof(x64emu_t, top));
if(a>0) {
SUBx_U12(scratch, scratch, a);
} else {
ADDx_U12(scratch, scratch, -a);
}
ADDx_U12(scratch, scratch, 7);
STRx_U12(scratch, xEmu, offsetof(x64emu_t, top));
// reset x87stack
dyn->x87stack = 0;
MESSAGE(LOG_DUMP, "\t------x87 Stackcount\n");
#endif
}
int x87_do_push(dynarec_arm_t* dyn, int ninst)
{
#if STEP > 1
dyn->x87stack+=1;
// move all regs in cache, and find a free one
int ret = -1;
for(int i=0; i<8; ++i)
if(dyn->x87cache[i]!=-1)
++dyn->x87cache[i];
else if(ret==-1) {
dyn->x87cache[i] = 0;
ret=dyn->x87reg[i]=fpu_get_reg_double(dyn);
}
return ret;
#else
return 0;
#endif
}
void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
dyn->x87stack+=1;
// move all regs in cache
for(int i=0; i<8; ++i)
if(dyn->x87cache[i]!=-1)
++dyn->x87cache[i];
if(s1)
x87_stackcount(dyn, ninst, s1);
#endif
}
void x87_do_pop(dynarec_arm_t* dyn, int ninst)
{
#if STEP > 1
dyn->x87stack-=1;
// move all regs in cache, poping ST0
for(int i=0; i<8; ++i)
if(dyn->x87cache[i]!=-1) {
--dyn->x87cache[i];
if(dyn->x87cache[i]==-1) {
fpu_free_reg_double(dyn, dyn->x87reg[i]);
dyn->x87reg[i] = -1;
}
}
#endif
}
static void x87_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
{
#if STEP > 1
int ret = 0;
for (int i=0; i<8 && !ret; ++i)
if(dyn->x87cache[i] != -1)
ret = 1;
if(!ret && !dyn->x87stack) // nothing to do
return;
MESSAGE(LOG_DUMP, "\tPurge x87 Cache and Synch Stackcount (%+d)\n", dyn->x87stack);
int a = dyn->x87stack;
if(a!=0) {
// reset x87stack
dyn->x87stack = 0;
// Add x87stack to emu fpu_stack
LDRx_U12(s2, xEmu, offsetof(x64emu_t, fpu_stack));
if(a>0) {
ADDx_U12(s2, s2, a);
} else {
SUBx_U12(s2, s2, -a);
}
STRx_U12(s2, xEmu, offsetof(x64emu_t, fpu_stack));
// Sub x87stack to top, with and 7
LDRx_U12(s2, xEmu, offsetof(x64emu_t, top));
// update tags (and top at the same time)
if(a>0) {
// new tag to fulls
MOVZw(s3, 0);
ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs));
for (int i=0; i<a; ++i) {
SUBw_U12(s2, s2, 1);
ANDw_U12(s2, s2, 7); // (emu->top + st)&7
STRw_REG_LSL2(s3, s1, s2);
}
} else {
// empty tags
MOVZw(s3, 0b11);
ADDx_U12(s1, xEmu, offsetof(x64emu_t, p_regs));
for (int i=0; i<-a; ++i) {
STRw_REG_LSL2(s3, s1, s2);
ADDw_U12(s2, s2, 1);
ANDw_U12(s2, s2, 7); // (emu->top + st)&7
}
}
STRw_U12(s2, xEmu, offsetof(x64emu_t, top));
} else {
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
}
if(ret!=0) {
// --- set values
// prepare offset to fpu => s1
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
// Get top
// loop all cache entries
for (int i=0; i<8; ++i)
if(dyn->x87cache[i]!=-1) {
ADDw_U12(s3, s2, dyn->x87cache[i]);
ANDw_U12(s3, s3, 7); // (emu->top + st)&7
VSTR64_REG_LSL3(dyn->x87reg[i], s1, s3);
fpu_free_reg_double(dyn, dyn->x87reg[i]);
dyn->x87reg[i] = -1;
dyn->x87cache[i] = -1;
}
}
#endif
}
#ifdef HAVE_TRACE
static void x87_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
{
#if STEP > 1
x87_stackcount(dyn, ninst, s1);
int ret = 0;
for (int i=0; (i<8) && (!ret); ++i)
if(dyn->x87cache[i] != -1)
ret = 1;
if(!ret) // nothing to do
return;
// prepare offset to fpu => s1
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
// Get top
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
// loop all cache entries
for (int i=0; i<8; ++i)
if(dyn->x87cache[i]!=-1) {
ADDw_U12(s3, s2, dyn->x87cache[i]);
ANDw_U12(s3, s3, 7); // (emu->top + i)&7
VLDR64_REG_LSL3(dyn->x87reg[i], s1, s3);
}
#endif
}
#endif
int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
{
#if STEP > 1
// search in cache first
for (int i=0; i<8; ++i)
if(dyn->x87cache[i]==st)
return i;
MESSAGE(LOG_DUMP, "\tCreate x87 Cache for ST%d\n", st);
// get a free spot
int ret = -1;
for (int i=0; (i<8) && (ret==-1); ++i)
if(dyn->x87cache[i]==-1)
ret = i;
// found, setup and grab the value
dyn->x87cache[ret] = st;
dyn->x87reg[ret] = fpu_get_reg_double(dyn);
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
int a = st - dyn->x87stack;
if(a) {
if(a<0) {
SUBw_U12(s2, s2, -a);
} else {
ADDw_U12(s2, s2, a);
}
ANDw_U12(s2, s2, 7);
}
VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st);
return ret;
#else
return 0;
#endif
}
int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a)
{
#if STEP > 1
return dyn->x87reg[x87_get_cache(dyn, ninst, s1, s2, a)];
#else
return 0;
#endif
}
void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
{
#if STEP > 1
x87_stackcount(dyn, ninst, s1);
int ret = -1;
for (int i=0; (i<8) && (ret==-1); ++i)
if(dyn->x87cache[i] == st)
ret = i;
if(ret==-1) // nothing to do
return;
MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st);
// prepare offset to fpu => s1
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
// Get top
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
// Update
if(st) {
ADDw_U12(s2, s2, st);
ANDw_U12(s2, s2, 7); // (emu->top + i)&7
}
VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st);
#endif
}
void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
{
#if STEP > 1
x87_stackcount(dyn, ninst, s1);
int ret = -1;
for (int i=0; (i<8) && (ret==-1); ++i)
if(dyn->x87cache[i] == st)
ret = i;
if(ret==-1) // nothing to do
return;
MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st);
// prepare offset to fpu => s1
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
// Get top
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
// Update
if(st) {
ADDw_U12(s2, s2, st);
ANDw_U12(s2, s2, 7); // (emu->top + i)&7
}
VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
MESSAGE(LOG_DUMP, "\t--------x87 Cache for ST%d\n", st);
// and forget that cache
fpu_free_reg_double(dyn, dyn->x87reg[ret]);
dyn->x87cache[ret] = -1;
dyn->x87reg[ret] = -1;
#endif
}
void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st)
{
#if STEP > 1
// search in cache first
for (int i=0; i<8; ++i)
if(dyn->x87cache[i]==st) {
// refresh the value
MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st);
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
int a = st - dyn->x87stack;
if(a<0) {
SUBw_U12(s2, s2, -a);
} else {
ADDw_U12(s2, s2, a);
}
ANDw_U12(s2, s2, 7); // (emu->top + i)&7
VLDR64_REG_LSL3(dyn->x87reg[i], s1, s2);
MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st);
// ok
return;
}
// Was not in the cache? creating it....
MESSAGE(LOG_DUMP, "\tCreate x87 Cache for ST%d\n", st);
// get a free spot
int ret = -1;
for (int i=0; (i<8) && (ret==-1); ++i)
if(dyn->x87cache[i]==-1)
ret = i;
// found, setup and grab the value
dyn->x87cache[ret] = st;
dyn->x87reg[ret] = fpu_get_reg_double(dyn);
ADDx_U12(s1, xEmu, offsetof(x64emu_t, mmx87));
LDRw_U12(s2, xEmu, offsetof(x64emu_t, top));
int a = st - dyn->x87stack;
if(a<0) {
SUBw_U12(s2, s2, -a);
} else {
ADDw_U12(s2, s2, a);
}
ANDw_U12(s2, s2, 7); // (emu->top + i)&7
VLDR64_REG_LSL3(dyn->x87reg[ret], s1, s2);
MESSAGE(LOG_DUMP, "\t-------x87 Cache for ST%d\n", st);
#endif
}
static int round_map[] = {0, 2, 1, 3}; // map x64 -> arm round flag
// Set rounding according to cw flags, return reg to restore flags
int x87_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
{
LDRH_U12(s1, xEmu, offsetof(x64emu_t, cw));
UBFXx(s2, s1, 10, 2); // extract round...
MOV64x(s1, (uintptr_t)round_map);
LDRw_REG_LSL2(s2, s1, s2);
VMRS(s1); // get fpscr
MOVx(s3, s1);
BFIx(s1, s2, 22, 2); // inject new round
VMSR(s1); // put new fpscr
return s3;
}
// Set rounding according to mxcsr flags, return reg to restore flags
int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
{
LDRH_U12(s1, xEmu, offsetof(x64emu_t, mxcsr));
UBFXx(s2, s1, 13, 2); // extract round...
MOV64x(s1, (uintptr_t)round_map);
LDRw_REG_LSL2(s2, s1, s2);
VMRS(s1); // get fpscr
MOVx(s3, s1);
BFIx(s1, s2, 22, 2); // inject new round
VMSR(s1); // put new fpscr
return s3;
}
// Restore round flag
void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1)
{
VMSR(s1); // put back fpscr
}
// MMX helpers
static void mmx_reset(dynarec_arm_t* dyn, int ninst)
{
#if STEP > 1
for (int i=0; i<8; ++i)
dyn->mmxcache[i] = -1;
#endif
}
// get neon register for a MMX reg, create the entry if needed
int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a)
{
#if STEP > 1
if(dyn->mmxcache[a]!=-1)
return dyn->mmxcache[a];
int ret = dyn->mmxcache[a] = fpu_get_reg_double(dyn);
VLDR64_U12(ret, xEmu, offsetof(x64emu_t, mmx87[a]));
return ret;
#else
return 0;
#endif
}
// get neon register for a MMX reg, but don't try to synch it if it needed to be created
int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a)
{
#if STEP > 1
if(dyn->mmxcache[a]!=-1)
return dyn->mmxcache[a];
int ret = dyn->mmxcache[a] = fpu_get_reg_double(dyn);
return ret;
#else
return 0;
#endif
}
// purge the MMX cache only(needs 3 scratch registers)
static void mmx_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
int old = -1;
for (int i=0; i<8; ++i)
if(dyn->mmxcache[i]!=-1) {
if (old==-1) {
MESSAGE(LOG_DUMP, "\tPurge MMX Cache ------\n");
++old;
}
VSTR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i]));
fpu_free_reg_double(dyn, dyn->mmxcache[i]);
dyn->mmxcache[i] = -1;
}
if(old!=-1) {
MESSAGE(LOG_DUMP, "\t------ Purge MMX Cache\n");
}
#endif
}
#ifdef HAVE_TRACE
static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
for (int i=0; i<8; ++i)
if(dyn->mmxcache[i]!=-1) {
VLDR64_U12(dyn->mmxcache[i], xEmu, offsetof(x64emu_t, mmx87[i]));
}
#endif
}
#endif
// SSE / SSE2 helpers
static void sse_reset(dynarec_arm_t* dyn, int ninst)
{
#if STEP > 1
for (int i=0; i<8; ++i)
dyn->ssecache[i] = -1;
#endif
}
// get neon register for a SSE reg, create the entry if needed
int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a)
{
#if STEP > 1
if(dyn->ssecache[a]!=-1)
return dyn->ssecache[a];
int ret = dyn->ssecache[a] = fpu_get_reg_quad(dyn);
VLDR128_U12(ret, xEmu, offsetof(x64emu_t, xmm[a]));
return ret;
#else
return 0;
#endif
}
// get neon register for a SSE reg, but don't try to synch it if it needed to be created
int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a)
{
#if STEP > 1
if(dyn->ssecache[a]!=-1)
return dyn->ssecache[a];
int ret = dyn->ssecache[a] = fpu_get_reg_quad(dyn);
return ret;
#else
return 0;
#endif
}
// purge the SSE cache only(needs 3 scratch registers)
static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
int old = -1;
for (int i=0; i<8; ++i)
if(dyn->ssecache[i]!=-1) {
if (old==-1) {
MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n");
++old;
}
VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i]));
fpu_free_reg_quad(dyn, dyn->ssecache[i]);
dyn->ssecache[i] = -1;
}
if(old!=-1) {
MESSAGE(LOG_DUMP, "\t------ Purge SSE Cache\n");
}
#endif
}
#ifdef HAVE_TRACE
static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
for (int i=0; i<8; ++i)
if(dyn->ssecache[i]!=-1) {
VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i]));
}
#endif
}
#endif
void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
// only need to push 16-31...
int n=0;
for (int i=8; i<32; i++)
if(dyn->fpuused[i-8])
++n;
if(!n)
return;
MESSAGE(LOG_DUMP, "\tPush FPU Cache (%d)------\n", n);
SUBx_U12(xSP, xSP, n*16);
MOV_frmSP(s1);
for (int i=8; i<32; ++i) {
if(dyn->fpuused[i-8]) {
VSTR128_S9_postindex(i, s1, 16);
}
}
MESSAGE(LOG_DUMP, "\t------- Push FPU Cache (%d)\n", n);
#endif
}
void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1)
{
#if STEP > 1
// we need to push 8-31 (because on 8..15 only low part is preserved)
int n=0;
for (int i=8; i<32; i++)
if(dyn->fpuused[i-8])
++n;
if(!n)
return;
MESSAGE(LOG_DUMP, "\tPop FPU Cache (%d)------\n", n);
MOV_frmSP(s1);
for (int i=8; i<32; ++i) {
if(dyn->fpuused[i-8]) {
VLDR128_S9_postindex(i, s1, 16);
}
}
ADDx_U12(xSP, xSP, n*16);
MESSAGE(LOG_DUMP, "\t------- Pop FPU Cache (%d)\n", n);
#endif
}
void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
{
x87_purgecache(dyn, ninst, s1, s2, s3);
mmx_purgecache(dyn, ninst, s1);
sse_purgecache(dyn, ninst, s1);
fpu_reset_reg(dyn);
}
#ifdef HAVE_TRACE
void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
{
x87_reflectcache(dyn, ninst, s1, s2, s3);
if(trace_emm)
mmx_reflectcache(dyn, ninst, s1);
if(trace_xmm)
sse_reflectcache(dyn, ninst, s1);
}
#endif
void fpu_reset(dynarec_arm_t* dyn, int ninst)
{
x87_reset(dyn, ninst);
mmx_reset(dyn, ninst);
sse_reset(dyn, ninst);
fpu_reset_reg(dyn);
}
void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
{
// PF: (((emu->x64emu_parity_tab[(res) / 32] >> ((res) % 32)) & 1) == 0)
ANDw_U12(s3, s1, 0xE0); // lsr 5 masking pre-applied
LSRw(s3, s3, 5);
MOV64x(s4, (uintptr_t)GetParityTab());
LDRw_REG_LSL2(s4, s4, s3);
ANDw_U12(s3, s1, 31);
LSRw_REG(s4, s4, s3);
MVNx(s4, s4);
BFIx(xFlags, s4, F_PF, 1);
}

View File

@ -0,0 +1,680 @@
#ifndef __DYNAREC_ARM64_HELPER_H__
#define __DYNAREC_ARM64_HELPER_H__
#if STEP == 0
#include "dynarec_arm64_pass0.h"
#elif STEP == 1
#include "dynarec_arm64_pass1.h"
#elif STEP == 2
#include "dynarec_arm64_pass2.h"
#elif STEP == 3
#include "dynarec_arm64_pass3.h"
#endif
#include "debug.h"
#include "arm64_emitter.h"
#include "../emu/x64primop.h"
#define F8 *(uint8_t*)(addr++)
#define F8S *(int8_t*)(addr++)
#define F16 *(uint16_t*)(addr+=2, addr-2)
#define F16S *(int16_t*)(addr+=2, addr-2)
#define F32 *(uint32_t*)(addr+=4, addr-4)
#define F32S *(int32_t*)(addr+=4, addr-4)
#define F32S64 (uint64_t)(int64_t)F32S
#define PK(a) *(uint8_t*)(addr+a)
#define PK16(a) *(uint16_t*)(addr+a)
#define PK32(a) *(uint32_t*)(addr+a)
#define PK64(a) *(uint64_t*)(addr+a)
#define PKip(a) *(uint8_t*)(ip+a)
// GETGD get x64 register in gd
#define GETGD gd = xEAX+((nextop&0x38)>>3)+(rex.r<<3)
//GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
#define GETEDx(D) if((nextop&0xC0)==0xC0) { \
ed = xEAX+(nextop&7)+(rex.b<<3); \
wback = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, 0, D); \
LDRx_U12(x1, wback, fixedaddress); \
ed = x1; \
}
#define GETEDw(D) if((nextop&0xC0)==0xC0) { \
ed = xEAX+(nextop&7)+(rex.b<<3); \
wback = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, 0, D); \
LDRw_U12(x1, wback, fixedaddress); \
ed = x1; \
}
//GETEDH can use hint for ed, and r1 or r2 for wback (depending on hint). wback is 0 if ed is xEAX..xEDI
#define GETEDH(hint) if((nextop&0xC0)==0xC0) { \
ed = xEAX+(nextop&7); \
wback = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 4095, 0); \
LDR_IMM9(hint, wback, fixedaddress); \
ed = hint; \
}
//GETEDW can use hint for wback and ret for ed. wback is 0 if ed is xEAX..xEDI
#define GETEDW(hint, ret) if((nextop&0xC0)==0xC0) { \
ed = xEAX+(nextop&7); \
MOV_REG(ret, ed); \
wback = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 4095, 0); \
ed = ret; \
LDR_IMM9(ed, wback, fixedaddress); \
}
// Write back ed in wback (if wback not 0)
#define WBACK if(wback) {STR_IMM9(ed, wback, fixedaddress);}
// Send back wb to either ed or wback
#define SBACK(wb) if(wback) {STR_IMM9(wb, wback, fixedaddress);} else {MOV_REG(ed, wb);}
//GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
#define GETEDO(O) if((nextop&0xC0)==0xC0) { \
ed = xEAX+(nextop&7); \
wback = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0); \
LDR_REG_LSL_IMM5(x1, wback, O, 0); \
ed = x1; \
}
#define WBACKO(O) if(wback) {STR_REG_LSL_IMM5(ed, wback, O, 0);}
//FAKEELike GETED, but doesn't get anything
#define FAKEED if((nextop&0xC0)!=0xC0) { \
addr = fakeed(dyn, addr, ninst, nextop); \
}
// GETGW extract x64 register in gd, that is i
#define GETGW(i) gd = xEAX+((nextop&0x38)>>3); UXTH(i, gd, 0); gd = i;
//GETEWW will use i for ed, and can use w for wback.
#define GETEWW(w, i) if((nextop&0xC0)==0xC0) { \
wback = xEAX+(nextop&7);\
UXTH(i, wback, 0); \
ed = i; \
wb1 = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, w, &fixedaddress, 255, 0); \
LDRH_IMM8(i, wback, fixedaddress); \
ed = i; \
wb1 = 1; \
}
//GETEW will use i for ed, and can use r3 for wback.
#define GETEW(i) if((nextop&0xC0)==0xC0) { \
wback = xEAX+(nextop&7);\
UXTH(i, wback, 0); \
ed = i; \
wb1 = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 255, 0); \
LDRH_IMM8(i, wback, fixedaddress); \
ed = i; \
wb1 = 1; \
}
//GETSEW will use i for ed, and can use r3 for wback. This is the Signed version
#define GETSEW(i) if((nextop&0xC0)==0xC0) { \
wback = xEAX+(nextop&7);\
SXTH(i, wback, 0); \
ed = i; \
wb1 = 0; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 255, 0); \
LDRSH_IMM8(i, wback, fixedaddress);\
ed = i; \
wb1 = 1; \
}
// Write ed back to original register / memory
#define EWBACK if(wb1) {STRH_IMM8(ed, wback, fixedaddress);} else {BFI(wback, ed, 0, 16);}
// Write w back to original register / memory
#define EWBACKW(w) if(wb1) {STRH_IMM8(w, wback, fixedaddress);} else {BFI(wback, w, 0, 16);}
// Write back gd in correct register
#define GWBACK BFI((xEAX+((nextop&0x38)>>3)), gd, 0, 16);
//GETEB will use i for ed, and can use r3 for wback.
#define GETEB(i) if((nextop&0xC0)==0xC0) { \
wback = (nextop&7); \
wb2 = (wback>>2); \
wback = xEAX+(wback&3); \
UXTB(i, wback, wb2); \
wb1 = 0; \
ed = i; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 4095, 0); \
LDRB_IMM9(i, wback, fixedaddress); \
wb1 = 1; \
ed = i; \
}
//GETEBO will use i for ed, i is also Offset, and can use r3 for wback.
#define GETEBO(i) if((nextop&0xC0)==0xC0) { \
wback = (nextop&7); \
wb2 = (wback>>2); \
wback = xEAX+(wback&3); \
UXTB(i, wback, wb2); \
wb1 = 0; \
ed = i; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0); \
ADD_REG_LSL_IMM5(wback, wback, i, 0); \
LDRB_IMM9(i, wback, fixedaddress); \
wb1 = 1; \
ed = i; \
}
//GETSEB sign extend EB, will use i for ed, and can use r3 for wback.
#define GETSEB(i) if((nextop&0xC0)==0xC0) { \
wback = (nextop&7); \
wb2 = (wback>>2); \
wback = xEAX+(wback&3); \
SXTB(i, wback, wb2); \
wb1 = 0; \
ed = i; \
} else { \
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 255, 0); \
LDRSB_IMM8(i, wback, fixedaddress);\
wb1 = 1; \
ed = i; \
}
// Write eb (ed) back to original register / memory
#define EBBACK if(wb1) {STRB_IMM9(ed, wback, fixedaddress);} else {BFI(wback, ed, wb2*8, 8);}
//GETGB will use i for gd
#define GETGB(i) gd = (nextop&0x38)>>3; \
gb2 = ((gd&4)>>2); \
gb1 = xEAX+(gd&3); \
gd = i; \
UXTB(gd, gb1, gb2);
//GETSGB signe extend GB, will use i for gd
#define GETSGB(i) gd = (nextop&0x38)>>3; \
gb2 = ((gd&4)>>2); \
gb1 = xEAX+(gd&3); \
gd = i; \
SXTB(gd, gb1, gb2);
// Write gb (gd) back to original register / memory
#define GBBACK BFI(gb1, gd, gb2*8, 8);
// Get Direction with size Z and based of F_DF flag, on register r ready for LDR/STR fetching
// F_DF is 1<<10, so 1 ROR 11*2 (so F_OF)
#define GETDIR(r, A) \
TSTS_IMM8_ROR(xFlags, 1, 0x0b); \
MOVW(r, A); \
RSB_COND_IMM8(cNE, r, r, 0)
// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1)
// R0 will not be pushed/popd if ret is -2
#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1)
// CALL_ will use x3 for the call address. Return value can be put in ret (unless ret is -1)
// R0 will not be pushed/popd if ret is -2
#define CALL_(F, ret) call_c(dyn, ninst, F, x3, ret, 1)
// CALL_S will use x3 for the call address. Return value can be put in ret (unless ret is -1)
// R0 will not be pushed/popd if ret is -2. Flags are not save/restored
#define CALL_S(F, ret) call_c(dyn, ninst, F, x3, ret, 0)
#define MARK if(dyn->insts) {dyn->insts[ninst].mark = (uintptr_t)dyn->arm_size;}
#define GETMARK ((dyn->insts)?dyn->insts[ninst].mark:(dyn->arm_size+4))
#define MARK2 if(dyn->insts) {dyn->insts[ninst].mark2 = (uintptr_t)dyn->arm_size;}
#define GETMARK2 ((dyn->insts)?dyn->insts[ninst].mark2:(dyn->arm_size+4))
#define MARK3 if(dyn->insts) {dyn->insts[ninst].mark3 = (uintptr_t)dyn->arm_size;}
#define GETMARK3 ((dyn->insts)?dyn->insts[ninst].mark3:(dyn->arm_size+4))
#define MARKF if(dyn->insts) {dyn->insts[ninst].markf = (uintptr_t)dyn->arm_size;}
#define GETMARKF ((dyn->insts)?dyn->insts[ninst].markf:(dyn->arm_size+4))
#define MARKSEG if(dyn->insts) {dyn->insts[ninst].markseg = (uintptr_t)dyn->arm_size;}
#define GETMARKSEG ((dyn->insts)?dyn->insts[ninst].markseg:(dyn->arm_size+4))
#define MARKLOCK if(dyn->insts) {dyn->insts[ninst].marklock = (uintptr_t)dyn->arm_size;}
#define GETMARKLOCK ((dyn->insts)?dyn->insts[ninst].marklock:(dyn->arm_size+4))
// Branch to MARK if cond (use j32)
#define B_MARK(cond) \
j32 = GETMARK-(dyn->arm_size+8); \
Bcond(cond, j32)
// Branch to MARK2 if cond (use j32)
#define B_MARK2(cond) \
j32 = GETMARK2-(dyn->arm_size+8); \
Bcond(cond, j32)
// Branch to MARK3 if cond (use j32)
#define B_MARK3(cond) \
j32 = GETMARK3-(dyn->arm_size+8); \
Bcond(cond, j32)
// Branch to next instruction if cond (use j32)
#define B_NEXT(cond) \
j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size+8)):0; \
Bcond(cond, j32)
// Branch to MARKSEG if cond (use j32)
#define B_MARKSEG(cond) \
j32 = GETMARKSEG-(dyn->arm_size+8); \
Bcond(cond, j32)
// Branch to MARKSEG if reg is 0 (use j32)
#define CBZx_MARKSEG(reg) \
j32 = GETMARKSEG-(dyn->arm_size+8); \
CBZx(reg, j32)
// Branch to MARKSEG if reg is 0 (use j32)
#define CBZw_MARKSEG(reg) \
j32 = GETMARKSEG-(dyn->arm_size+8); \
CBZw(reg, j32)
// Branch to MARKLOCK if cond (use j32)
#define B_MARKLOCK(cond) \
j32 = GETMARKLOCK-(dyn->arm_size+8); \
Bcond(cond, j32)
#define IFX(A) if(dyn->insts && (dyn->insts[ninst].x64.need_flags&(A)))
#define IFXX(A) if(dyn->insts && (dyn->insts[ninst].x64.need_flags==(A)))
#define IFX2X(A, B) if(dyn->insts && (dyn->insts[ninst].x64.need_flags==(A) || dyn->insts[ninst].x64.need_flags==(B) || dyn->insts[ninst].x64.need_flags==((A)|(B))))
#define IFXN(A, B) if(dyn->insts && (dyn->insts[ninst].x64.need_flags&(A) && !(dyn->insts[ninst].x64.need_flags&(B))))
// Generate FCOM with s1 and s2 scratch regs (the VCMP is already done)
#define FCOM(s1, s2) \
VMRS_APSR(); /* 0b0100011100000000 */ \
LDRH_IMM8(s1, xEmu, offsetof(x64emu_t, sw)); /*offset is 8bits right?*/ \
BIC_IMM8(s1, s1, 0b01000111, 12); \
ORR_IMM8_COND(cVS, s1, s1, 0b01000101, 12); /* unordered */ \
ORR_IMM8_COND(cEQ, s1, s1, 0b01000000, 12); /* equal */ \
ORR_IMM8_COND(cMI, s1, s1, 0b00000001, 12); /* less than */ \
/* greater than leave 0 */ \
STRH_IMM8(s1, xEmu, offsetof(x64emu_t, sw))
// Generate FCOMI with s1 and s2 scratch regs (the VCMP is already done)
#define FCOMI(s1, s2) \
IFX(X_CF|X_PF|X_ZF|X_PEND) { \
VMRS_APSR(); /* 0b111 */ \
BIC_IMM8(xFlags, xFlags, 0b1000101, 0); \
ORR_IMM8_COND(cVS, xFlags, xFlags, 0b01000101, 0); /* unordered */ \
ORR_IMM8_COND(cEQ, xFlags, xFlags, 0b01000000, 0); /* zero */ \
ORR_IMM8_COND(cMI, xFlags, xFlags, 0b00000001, 0); /* less than */ \
/* greater than leave 0 */ \
} \
SET_DFNONE(s1); \
IFX(X_OF|X_PEND) { \
BFC(xFlags, F_OF, 1); \
} \
IFX(X_AF|X_PEND) { \
BFC(xFlags, F_AF, 1); \
} \
IFX(X_SF|X_PEND) { \
BFC(xFlags, F_SF, 1); \
} \
#define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
#define STORE_XEMU_REGS(A) \
STORE_REG(RAX); \
STORE_REG(RCX); \
STORE_REG(RDX); \
STORE_REG(RBX); \
STORE_REG(RSP); \
STORE_REG(RBP); \
STORE_REG(RSI); \
STORE_REG(RDI); \
STORE_REG(R8); \
STORE_REG(R9); \
STORE_REG(R10); \
STORE_REG(R11); \
STORE_REG(R12); \
STORE_REG(R13); \
STORE_REG(R14); \
STORE_REG(R15); \
STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \
if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));}
#define SET_DFNONE(S) if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;}
#define SET_DF(S, N) if(N) {MOVZw(S, N); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S)
#define SET_NODF() dyn->dfnone = 0
#define SET_DFOK() dyn->dfnone = 1
#ifndef READFLAGS
#define READFLAGS(A) \
if(((A)!=X_PEND) && dyn->state_flags!=SF_SET) { \
if(dyn->state_flags!=SF_PENDING) { \
LDR_IMM9(x3, xEmu, offsetof(x64emu_t, df)); \
TSTS_REG_LSL_IMM5(x3, x3, 0); \
j32 = (GETMARKF)-(dyn->arm_size+8); \
Bcond(cEQ, j32); \
} \
CALL_(UpdateFlags, -1, 0); \
MARKF; \
dyn->state_flags = SF_SET; \
SET_DFOK(); \
}
#endif
#ifndef SETFLAGS
#define SETFLAGS(A, B) \
if(dyn->state_flags!=SF_SET && B==SF_SUBSET && (dyn->insts[ninst].x64.need_flags&(~((A)|X_PEND)))) \
READFLAGS(dyn->insts[ninst].x64.need_flags&(~(A))); \
dyn->state_flags = (B==SF_SUBSET)?SF_SET:B
#endif
#ifndef JUMP
#define JUMP(A)
#endif
#ifndef BARRIER
#define BARRIER(A)
#endif
#ifndef BARRIER_NEXT
#define BARRIER_NEXT(A)
#endif
#define UFLAG_OP1(A) if(dyn->insts && dyn->insts[ninst].x64.need_flags) {STR_IMM9(A, xEmu, offsetof(x64emu_t, op1));}
#define UFLAG_OP2(A) if(dyn->insts && dyn->insts[ninst].x64.need_flags) {STR_IMM9(A, xEmu, offsetof(x64emu_t, op2));}
#define UFLAG_OP12(A1, A2) if(dyn->insts && dyn->insts[ninst].x64.need_flags) {STR_IMM9(A1, xEmu, offsetof(x64emu_t, op1));STR_IMM9(A2, 0, offsetof(x64emu_t, op2));}
#define UFLAG_RES(A) if(dyn->insts && dyn->insts[ninst].x64.need_flags) {STR_IMM9(A, xEmu, offsetof(x64emu_t, res));}
#define UFLAG_DF(r, A) if(dyn->insts && dyn->insts[ninst].x64.need_flags) {SET_DF(r, A)}
#define UFLAG_IF if(dyn->insts && dyn->insts[ninst].x64.need_flags)
#ifndef DEFAULT
#define DEFAULT *ok = -1; BARRIER(2)
#endif
#ifndef NEW_BARRIER_INST
#define NEW_BARRIER_INST
#endif
#if STEP < 2
#define PASS2IF(A, B) if(A)
#elif STEP == 2
#define PASS2IF(A, B) if(A) dyn->insts[ninst].pass2choice = B; if(dyn->insts[ninst].pass2choice == B)
#else
#define PASS2IF(A, B) if(dyn->insts[ninst].pass2choice == B)
#endif
void arm64_epilog();
void* arm64_next(x64emu_t* emu, uintptr_t addr);
#ifndef STEPNAME
#define STEPNAME3(N,M) N##M
#define STEPNAME2(N,M) STEPNAME3(N,M)
#define STEPNAME(N) STEPNAME2(N, STEP)
#endif
#define arm_pass STEPNAME(arm_pass)
#define dynarec64_00 STEPNAME(dynarec64_00)
#define dynarec64_0F STEPNAME(dynarec64_0F)
#define dynarec64_FS STEPNAME(dynarec64_FS)
#define dynarec64_GS STEPNAME(dynarec64_GS)
#define dynarec64_66 STEPNAME(dynarec64_66)
#define dynarec64_67 STEPNAME(dynarec64_67)
#define dynarec64_D8 STEPNAME(dynarec64_D8)
#define dynarec64_D9 STEPNAME(dynarec64_D9)
#define dynarec64_DA STEPNAME(dynarec64_DA)
#define dynarec64_DB STEPNAME(dynarec64_DB)
#define dynarec64_DC STEPNAME(dynarec64_DC)
#define dynarec64_DD STEPNAME(dynarec64_DD)
#define dynarec64_DE STEPNAME(dynarec64_DE)
#define dynarec64_DF STEPNAME(dynarec64_DF)
#define dynarec64_F0 STEPNAME(dynarec64_F0)
#define dynarec64_660F STEPNAME(dynarec64_660F)
#define dynarec64_F20F STEPNAME(dynarec64_F20F)
#define dynarec64_F30F STEPNAME(dynarec64_F30F)
#define geted STEPNAME(geted_)
#define geted16 STEPNAME(geted16_)
#define jump_to_epilog STEPNAME(jump_to_epilog_)
#define jump_to_next STEPNAME(jump_to_next_)
#define ret_to_epilog STEPNAME(ret_to_epilog_)
#define retn_to_epilog STEPNAME(retn_to_epilog_)
#define iret_to_epilog STEPNAME(iret_to_epilog_)
#define call_c STEPNAME(call_c_)
#define grab_fsdata STEPNAME(grab_fsdata_)
#define grab_tlsdata STEPNAME(grab_tlsdata_)
#define emit_cmp8 STEPNAME(emit_cmp8)
#define emit_cmp16 STEPNAME(emit_cmp16)
#define emit_cmp32 STEPNAME(emit_cmp32)
#define emit_cmp8_0 STEPNAME(emit_cmp8_0)
#define emit_cmp16_0 STEPNAME(emit_cmp16_0)
#define emit_cmp32_0 STEPNAME(emit_cmp32_0)
#define emit_test8 STEPNAME(emit_test8)
#define emit_test16 STEPNAME(emit_test16)
#define emit_test32 STEPNAME(emit_test32)
#define emit_add32 STEPNAME(emit_add32)
#define emit_add32c STEPNAME(emit_add32c)
#define emit_add8 STEPNAME(emit_add8)
#define emit_add8c STEPNAME(emit_add8c)
#define emit_sub32 STEPNAME(emit_sub32)
#define emit_sub32c STEPNAME(emit_sub32c)
#define emit_sub8 STEPNAME(emit_sub8)
#define emit_sub8c STEPNAME(emit_sub8c)
#define emit_or32 STEPNAME(emit_or32)
#define emit_or32c STEPNAME(emit_or32c)
#define emit_xor32 STEPNAME(emit_xor32)
#define emit_xor32c STEPNAME(emit_xor32c)
#define emit_and32 STEPNAME(emit_and32)
#define emit_and32c STEPNAME(emit_and32c)
#define emit_or8 STEPNAME(emit_or8)
#define emit_or8c STEPNAME(emit_or8c)
#define emit_xor8 STEPNAME(emit_xor8)
#define emit_xor8c STEPNAME(emit_xor8c)
#define emit_and8 STEPNAME(emit_and8)
#define emit_and8c STEPNAME(emit_and8c)
#define emit_add16 STEPNAME(emit_add16)
#define emit_add16c STEPNAME(emit_add16c)
#define emit_sub16 STEPNAME(emit_sub16)
#define emit_sub16c STEPNAME(emit_sub16c)
#define emit_or16 STEPNAME(emit_or16)
#define emit_or16c STEPNAME(emit_or16c)
#define emit_xor16 STEPNAME(emit_xor16)
#define emit_xor16c STEPNAME(emit_xor16c)
#define emit_and16 STEPNAME(emit_and16)
#define emit_and16c STEPNAME(emit_and16c)
#define emit_inc32 STEPNAME(emit_inc32)
#define emit_inc16 STEPNAME(emit_inc16)
#define emit_inc8 STEPNAME(emit_inc8)
#define emit_dec32 STEPNAME(emit_dec32)
#define emit_dec16 STEPNAME(emit_dec16)
#define emit_dec8 STEPNAME(emit_dec8)
#define emit_adc32 STEPNAME(emit_adc32)
#define emit_adc32c STEPNAME(emit_adc32c)
#define emit_adc8 STEPNAME(emit_adc8)
#define emit_adc8c STEPNAME(emit_adc8c)
#define emit_adc16 STEPNAME(emit_adc16)
#define emit_adc16c STEPNAME(emit_adc16c)
#define emit_sbb32 STEPNAME(emit_sbb32)
#define emit_sbb32c STEPNAME(emit_sbb32c)
#define emit_sbb8 STEPNAME(emit_sbb8)
#define emit_sbb8c STEPNAME(emit_sbb8c)
#define emit_sbb16 STEPNAME(emit_sbb16)
#define emit_sbb16c STEPNAME(emit_sbb16c)
#define emit_neg32 STEPNAME(emit_neg32)
#define emit_neg16 STEPNAME(emit_neg16)
#define emit_neg8 STEPNAME(emit_neg8)
#define emit_shl32 STEPNAME(emit_shl32)
#define emit_shl32c STEPNAME(emit_shl32c)
#define emit_shr32 STEPNAME(emit_shr32)
#define emit_shr32c STEPNAME(emit_shr32c)
#define emit_sar32c STEPNAME(emit_sar32c)
#define emit_rol32c STEPNAME(emit_rol32c)
#define emit_ror32c STEPNAME(emit_ror32c)
#define emit_shrd32c STEPNAME(emit_shrd32c)
#define emit_shld32c STEPNAME(emit_shld32c)
#define emit_pf STEPNAME(emit_pf)
#define x87_do_push STEPNAME(x87_do_push)
#define x87_do_push_empty STEPNAME(x87_do_push_empty)
#define x87_do_pop STEPNAME(x87_do_pop)
#define x87_get_cache STEPNAME(x87_get_cache)
#define x87_get_st STEPNAME(x87_get_st)
#define x87_refresh STEPNAME(x87_refresh)
#define x87_forget STEPNAME(x87_forget)
#define x87_reget_st STEPNAME(x87_reget_st)
#define x87_stackcount STEPNAME(x87_stackcount)
#define x87_setround STEPNAME(x87_setround)
#define x87_restoreround STEPNAME(x87_restoreround)
#define sse_setround STEPNAME(sse_setround)
#define mmx_get_reg STEPNAME(mmx_get_reg)
#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty)
#define sse_get_reg STEPNAME(sse_get_reg)
#define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
#define fpu_pushcache STEPNAME(fpu_pushcache)
#define fpu_popcache STEPNAME(fpu_popcache)
#define fpu_reset STEPNAME(fpu_reset)
#define fpu_purgecache STEPNAME(fpu_purgecache)
#ifdef HAVE_TRACE
#define fpu_reflectcache STEPNAME(fpu_reflectcache)
#endif
/* setup r2 to address pointed by */
uintptr_t geted(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int* fixaddress, int absmax, uint32_t mask, rex_t rex, int s, int delta);
/* setup r2 to address pointed by */
uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int* fixaddress, int absmax, uint32_t mask, int s);
// generic x64 helper
void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
void ret_to_epilog(dynarec_arm_t* dyn, int ninst);
void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n);
void iret_to_epilog(dynarec_arm_t* dyn, int ninst);
void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags);
//void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg);
//void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg);
//void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_cmp32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_test32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_add32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_add32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sub32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_sub32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_or32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_or32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_xor32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_xor32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_and32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_and32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_sub16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_or16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_or16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_xor16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_xor16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_and16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_inc32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_inc16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_inc8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_dec32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_adc32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sbb32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
//void emit_shl32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_shl32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_shr32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
//void emit_shr32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_sar32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_rol32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_ror32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
//void emit_shrd32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4);
//void emit_shld32c(dynarec_arm_t* dyn, int ninst, int s1, int s2, int32_t c, int s3, int s4);
void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
// x87 helper
// cache of the local stack counter, to avoid upadte at every call
void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch);
// fpu push. Return the Dd value to be used
int x87_do_push(dynarec_arm_t* dyn, int ninst);
// fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it)
void x87_do_push_empty(dynarec_arm_t* dyn, int ninst, int s1);
// fpu pop. All previous returned Dd should be considered invalid
void x87_do_pop(dynarec_arm_t* dyn, int ninst);
// get cache index for a x87 reg, create the entry if needed
int x87_get_cache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a);
// get vfpu register for a x87 reg, create the entry if needed
int x87_get_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int a);
// refresh a value from the cache ->emu (nothing done if value is not cached)
void x87_refresh(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st);
// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached)
void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st);
// refresh the cache value from emu
void x87_reget_st(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st);
// Set rounding according to cw flags, return reg to restore flags
int x87_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3);
// Restore round flag
void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1);
// Set rounding according to mxcsr flags, return reg to restore flags
int sse_setround(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3);
//MMX helpers
// get neon register for a MMX reg, create the entry if needed
int mmx_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a);
// get neon register for a MMX reg, but don't try to synch it if it needed to be created
int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a);
//SSE/SSE2 helpers
// get neon register for a SSE reg, create the entry if needed
int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a);
// get neon register for a SSE reg, but don't try to synch it if it needed to be created
int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a);
// common coproc helpers
// reset the cache
void fpu_reset(dynarec_arm_t* dyn, int ninst);
// purge the FPU cache (needs 3 scratch registers)
void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3);
#ifdef HAVE_TRACE
void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3);
#endif
void fpu_pushcache(dynarec_arm_t* dyn, int ninst, int s1);
void fpu_popcache(dynarec_arm_t* dyn, int ninst, int s1);
uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_FS(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_GS(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
//uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int* ok, int* need_epilog);
#if STEP < 2
#define PASS2(A)
#else
#define PASS2(A) A
#endif
#if STEP < 3
#define PASS3(A)
#else
#define PASS3(A) A
#endif
#if STEP < 3
#define MAYUSE(A) (void)A
#else
#define MAYUSE(A)
#endif
#endif //__DYNAREC_ARM64_HELPER_H__

100
src/dynarec/dynarec_arm64_pass.c Executable file
View File

@ -0,0 +1,100 @@
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <pthread.h>
#include <errno.h>
#include "debug.h"
#include "box64context.h"
#include "dynarec.h"
#include "emu/x64emu_private.h"
#include "emu/x64run_private.h"
#include "x64run.h"
#include "x64emu.h"
#include "box64stack.h"
#include "emu/x64run_private.h"
#include "x64trace.h"
#include "dynarec_arm64.h"
#include "dynarec_arm64_private.h"
#include "arm64_printer.h"
#include "dynarec_arm64_functions.h"
#include "dynarec_arm64_helper.h"
#include "custommem.h"
#ifndef STEP
#error No STEP defined
#endif
void arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
{
int ok = 1;
int ninst = 0;
uintptr_t ip = addr;
int need_epilog = 1;
dyn->sons_size = 0;
// Clean up (because there are multiple passes)
dyn->state_flags = 0;
dyn->dfnone = 0;
fpu_reset(dyn, ninst);
// ok, go now
INIT;
while(ok) {
if(dyn->insts && (ninst>dyn->size)) {dynarec_log(LOG_NONE, "Warning, too many inst treated (%d / %d)\n",ninst, dyn->size);}
ip = addr;
if(dyn->insts && (dyn->insts[ninst].x64.barrier==1)) {
NEW_BARRIER_INST;
}
NEW_INST;
fpu_reset_scratch(dyn);
#ifdef HAVE_TRACE
if(my_context->dec && box64_dynarec_trace) {
if((trace_end == 0)
|| ((ip >= trace_start) && (ip < trace_end))) {
MESSAGE(LOG_DUMP, "TRACE ----\n");
fpu_reflectcache(dyn, ninst, x1, x2, x3);
MOV64x(x1, ip);
STORE_XEMU_REGS(x1);
MOVw(x2, 1);
CALL(PrintTrace, -1);
MESSAGE(LOG_DUMP, "----------\n");
}
}
#endif
addr = dynarec64_00(dyn, addr, ip, ninst, &ok, &need_epilog);
INST_EPILOG;
if(dyn->insts && dyn->insts[ninst+1].x64.barrier) {
fpu_purgecache(dyn, ninst, x1, x2, x3);
if(dyn->insts[ninst+1].x64.barrier!=2) {
dyn->state_flags = 0;
dyn->dfnone = 0;
}
}
if(!ok && !need_epilog && dyn->insts && (addr < (dyn->start+dyn->isize))) {
ok = 1;
}
if(!ok && !need_epilog && !dyn->insts && getProtection(addr+3))
if(*(uint32_t*)addr!=0) { // check if need to continue (but is next 4 bytes are 0, stop)
uintptr_t next = get_closest_next(dyn, addr);
if(next && (
(((next-addr)<15) && is_nops(dyn, addr, next-addr))
||(((next-addr)<30) && is_instructions(dyn, addr, next-addr)) ))
{
dynarec_log(LOG_DEBUG, "Extend block %p, %p -> %p (ninst=%d)\n", dyn, (void*)addr, (void*)next, ninst);
ok = 1;
} else if(next && (next-addr)<30) {
dynarec_log(LOG_DEBUG, "Cannot extend block %p -> %p (%02X %02X %02X %02X %02X %02X %02X %02x)\n", (void*)addr, (void*)next, PK(0), PK(1), PK(2), PK(3), PK(4), PK(5), PK(6), PK(7));
}
}
if(ok<0) {ok = 0; need_epilog=1;}
++ninst;
}
if(need_epilog) {
fpu_purgecache(dyn, ninst, x1, x2, x3);
jump_to_epilog(dyn, ip, 0, ninst); // no linker here, it's an unknow instruction
}
FINI;
MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d, %d sons)\n", dyn->size, dyn->sons_size);
}

View File

@ -0,0 +1,25 @@
#define INIT uintptr_t sav_addr=addr
#define FINI dyn->isize = addr-sav_addr
#define MESSAGE(A, ...)
#define SETFLAGS(A, B)
#define READFLAGS(A)
#define EMIT(A)
#define JUMP(A) add_next(dyn, (uintptr_t)A)
#define NEW_INST ++dyn->size
#define INST_EPILOG
#define INST_NAME(name)
#define DEFAULT \
--dyn->size; \
*ok = -1; \
if(box64_dynarec_log>=LOG_INFO) {\
dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \
(void*)ip, PKip(0), \
PKip(1), PKip(2), PKip(3), \
PKip(4), PKip(5), PKip(6), \
PKip(7), PKip(8), PKip(9), \
PKip(10),PKip(11),PKip(12), \
PKip(13),PKip(14)); \
printFunctionAddr(ip, " => "); \
dynarec_log(LOG_NONE, "\n"); \
}

View File

@ -0,0 +1,17 @@
#define INIT
#define FINI \
dyn->insts[ninst].x64.addr = addr; \
if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;
#define MESSAGE(A, ...)
#define EMIT(A)
#define READFLAGS(A) dyn->insts[ninst].x64.use_flags = A
#define SETFLAGS(A,B) {dyn->insts[ninst].x64.set_flags = A; dyn->insts[ninst].x64.state_flags = B;}
#define JUMP(A) dyn->insts[ninst].x64.jmp = A
#define BARRIER(A) dyn->insts[ninst].x64.barrier = A
#define BARRIER_NEXT(A) if(ninst<dyn->size) dyn->insts[ninst+1].x64.barrier = A
#define NEW_INST \
dyn->insts[ninst].x64.addr = ip; \
if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;
#define INST_EPILOG
#define INST_NAME(name)

View File

@ -0,0 +1,9 @@
#define INIT dyn->arm_size = 0
#define FINI if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);}
#define MESSAGE(A, ...)
#define EMIT(A) dyn->insts[ninst].size+=4; dyn->arm_size+=4
#define NEW_INST if(ninst) {dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);}
#define INST_EPILOG dyn->insts[ninst].epilog = dyn->arm_size;
#define INST_NAME(name)
#define NEW_BARRIER_INST if(ninst) ++dyn->sons_size

View File

@ -0,0 +1,34 @@
#define INIT
#define FINI
#define EMIT(A) \
if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), arm64_print(A));} \
*(uint32_t*)(dyn->block) = (uint32_t)(A); \
dyn->block += 4; dyn->arm_size += 4;\
dyn->insts[ninst].size2 += 4
#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
#define NEW_INST
#define INST_EPILOG
#define INST_NAME(name) \
if(box64_dynarec_dump) {\
printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \
dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, state=%d/%d, set=%X, use=%X, need=%X%s\n", \
(box64_dynarec_dump>1)?"\e[32m":"", \
(void*)(dyn->arm_start+dyn->insts[ninst].address), \
dyn->insts[ninst].size/4, \
dyn->insts[ninst].x64.state_flags, \
dyn->state_flags, \
dyn->insts[ninst].x64.set_flags, \
dyn->insts[ninst].x64.use_flags, \
dyn->insts[ninst].x64.need_flags, \
(box64_dynarec_dump>1)?"\e[m":""); \
}
#define NEW_BARRIER_INST \
if(ninst) { \
dyn->sons_x64[dyn->sons_size] = (uintptr_t)ip; \
dyn->sons_arm[dyn->sons_size] = dyn->block; \
MESSAGE(LOG_DUMP, "----> potential Son here\n");\
++dyn->sons_size; \
}

View File

@ -34,7 +34,7 @@ typedef struct dynarec_arm_s {
int x87reg[8]; // reg used for x87cache entry
int mmxcache[8];// cache status for the 8 MMX registers
int ssecache[8];// cache status for the 8 SSE(2) registers
int fpuused[24];// all 8..31 double reg from fpu, used by x87, sse and mmx
int fpuused[24];// all 8..31 Q reg from fpu, used by x87, sse and mmx
int x87stack; // cache stack counter
int fpu_scratch;// scratch counter
int fpu_extra_qscratch; // some opcode need an extra quad scratch register

View File

@ -2,11 +2,19 @@
#define __REGS_H_
enum {
_AX, _CX, _DX, _BX,
_SP, _BP, _SI, _DI,
_RAX, _RCX, _RDX, _RBX,
_RSP, _RBP, _RSI, _RDI,
_R8, _R9,_R10,_R11,
_R12,_R13,_R14,_R15
};
#define _AX _RAX
#define _CX _RCX
#define _DX _RDX
#define _BX _RBX
#define _SP _RSP
#define _BP _RBP
#define _SI _RSI
#define _DI _RDI
enum {
_CS, _DS, _SS, _ES, _FS, _GS