new_dynarec: Initial x64 and arm64 support

This commit is contained in:
Gillou68310 2018-12-11 18:53:55 +01:00
parent df125cbed6
commit c4c63a1242
14 changed files with 11798 additions and 179 deletions

File diff suppressed because it is too large Load Diff

View File

@ -115,6 +115,12 @@
<Filter Include="device\dd">
<UniqueIdentifier>{ff19301d-1e15-4787-8e09-d608004dd009}</UniqueIdentifier>
</Filter>
<Filter Include="device\r4300\new_dynarec\x64">
<UniqueIdentifier>{23daae91-0471-4a19-a7e8-70975acb835a}</UniqueIdentifier>
</Filter>
<Filter Include="device\r4300\new_dynarec\arm64">
<UniqueIdentifier>{8f211c2b-70bb-4714-bcbb-c77abd1af54d}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\api\callbacks.c">
@ -402,6 +408,12 @@
<ClCompile Include="..\..\src\device\r4300\new_dynarec\recomp_dbg.c">
<Filter>device\r4300\new_dynarec</Filter>
</ClCompile>
<ClCompile Include="..\..\src\device\r4300\new_dynarec\x64\assem_x64.c">
<Filter>device\r4300\new_dynarec\x64</Filter>
</ClCompile>
<ClCompile Include="..\..\src\device\r4300\new_dynarec\arm64\assem_arm64.c">
<Filter>device\r4300\new_dynarec\arm64</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\src\api\callbacks.h">
@ -734,6 +746,12 @@
<ClInclude Include="..\..\subprojects\minizip\zip.h">
<Filter>subprojects\minizip</Filter>
</ClInclude>
<ClInclude Include="..\..\src\device\r4300\new_dynarec\x64\assem_x64.h">
<Filter>device\r4300\new_dynarec\x64</Filter>
</ClInclude>
<ClInclude Include="..\..\src\device\r4300\new_dynarec\arm64\assem_arm64.h">
<Filter>device\r4300\new_dynarec\arm64</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="..\..\src\device\r4300\mips_instructions.def">
@ -745,6 +763,9 @@
<None Include="..\..\src\device\r4300\new_dynarec\arm\linkage_arm.S">
<Filter>device\r4300\new_dynarec\arm</Filter>
</None>
<None Include="..\..\src\device\r4300\new_dynarec\arm64\linkage_arm64.S">
<Filter>device\r4300\new_dynarec\arm64</Filter>
</None>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="..\..\src\device\r4300\new_dynarec\x86\linkage_x86.asm">
@ -756,5 +777,8 @@
<CustomBuild Include="..\..\src\device\r4300\x86\dyna_start.asm">
<Filter>device\r4300\x86</Filter>
</CustomBuild>
<CustomBuild Include="..\..\src\device\r4300\new_dynarec\x64\linkage_x64.asm">
<Filter>device\r4300\new_dynarec\x64</Filter>
</CustomBuild>
</ItemGroup>
</Project>

View File

@ -124,12 +124,10 @@ ifneq ("$(filter mips,$(HOST_CPU))","")
$(warning Architecture "$(HOST_CPU)" not officially supported.)
endif
ifneq ("$(filter aarch64,$(HOST_CPU))","")
CPU := AARCH
CPU := ARM
ARCH_DETECTED := 64BITS
PIC ?= 1
NEW_DYNAREC := 1
NO_ASM := 1
LDFLAGS += -fPIC
endif
ifeq ("$(CPU)","NONE")
$(error CPU type "$(HOST_CPU)" not supported. Please file bug report at 'https://github.com/mupen64plus/mupen64plus-core/issues')
@ -580,11 +578,14 @@ ifneq ($(NO_ASM), 1)
endif
endif
ifeq ($(CPU), ARM)
ifeq ($(ARCH_DETECTED), 32BITS)
ifeq ($(ARCH_DETECTED), 64BITS)
DYNAREC = arm64
else
DYNAREC = arm
endif
endif
endif
ifneq ($(DYNAREC), )
CFLAGS += -DDYNAREC
@ -593,15 +594,21 @@ ifneq ($(DYNAREC), )
CFLAGS += -DNEW_DYNAREC=1
SOURCE += \
$(SRCDIR)/device/r4300/new_dynarec/x86/linkage_x86.asm
else ifeq ($(DYNAREC), x86_64)
CFLAGS += -DNEW_DYNAREC=2
SOURCE += \
$(SRCDIR)/device/r4300/new_dynarec/x64/linkage_x64.asm
else ifeq ($(DYNAREC), arm)
CFLAGS += -DNEW_DYNAREC=3
SOURCE += \
$(SRCDIR)/device/r4300/new_dynarec/arm/linkage_arm.S \
$(SRCDIR)/device/r4300/new_dynarec/arm/arm_cpu_features.c
else ifeq ($(DYNAREC), arm64)
CFLAGS += -DNEW_DYNAREC=4
SOURCE += \
$(SRCDIR)/device/r4300/new_dynarec/arm64/linkage_arm64.S
else
ifeq ($(DYNAREC), arm)
CFLAGS += -DNEW_DYNAREC=3
SOURCE += \
$(SRCDIR)/device/r4300/new_dynarec/arm/linkage_arm.S \
$(SRCDIR)/device/r4300/new_dynarec/arm/arm_cpu_features.c
else
$(error NEW_DYNAREC is only supported on 32 bit x86 and 32 bit armel)
endif
$(error NEW_DYNAREC is not supported on this architecture)
endif
SOURCE += \

View File

@ -3012,6 +3012,7 @@ static int do_dirty_stub(int i)
static void do_dirty_stub_ds(void)
{
assert((int)start>=(int)0xC0000000);
// Careful about the code output here, verify_dirty needs to parse it.
#ifdef ARMv5_ONLY
emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
#ifndef M64P_DEVICE_R4300_NEW_DYNAREC_ARM_ASSEM_ARM64_H
#define M64P_DEVICE_R4300_NEW_DYNAREC_ARM_ASSEM_ARM64_H
#define HOST_REGS 29
#define HOST_CCREG 20 /* callee-save */
#define HOST_BTREG 19 /* callee-save */
#define EXCLUDE_REG 29 /* FP */
#define NATIVE_64 1
#define HOST_IMM8 1
//#define HAVE_CMOV_IMM 1
//#define CORTEX_A8_BRANCH_PREDICTION_HACK 1
//#define REG_PREFETCH 1
//#define HAVE_CONDITIONAL_CALL 1
#define RAM_OFFSET 1
#define USE_MINI_HT 1
//#define INTERPRETED_MULT64 1
//#define INTERPRETED_DIV64 1
/* ARM calling convention:
x0-x18: caller-save
x19-x28: callee-save */
#define ARG1_REG 0
#define ARG2_REG 1
#define ARG3_REG 2
#define ARG4_REG 3
/* GCC register naming convention:
x16 = ip0 (scratch)
x17 = ip1 (scratch)
x29 = fp (frame pointer)
x30 = lr (link register)
x31 = sp (stack pointer) */
#define FP 29
#define LR 30
#define WZR 31
#define XZR WZR
#define CALLER_SAVED_REGS 0x7ffff
#define HOST_TEMPREG 30
// Note: FP is set to &dynarec_local when executing generated code.
// Thus the local variables are actually global and not on the stack.
#define TARGET_SIZE_2 25 // 2^25 = 32 megabytes
#define JUMP_TABLE_SIZE (sizeof(jump_table_symbols)*2)
#endif /* M64P_DEVICE_R4300_NEW_DYNAREC_ARM_ASSEM_ARM64_H */

View File

@ -0,0 +1,428 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Mupen64plus - linkage_arm64.S *
* Copyright (C) 2009-2018 Gillou68310 *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write to the *
* Free Software Foundation, Inc., *
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#define GLOBAL_FUNCTION(name) \
.globl name; \
.hidden name; \
.type name, %function; \
name
#define LOCAL_FUNCTION(name) \
.hidden name; \
.type name, %function; \
name
#define GLOBAL_VARIABLE(name, size_) \
.global name; \
.hidden name; \
.type name, %object; \
.size name, size_
.macro movl Wn, imm
movz \Wn, (\imm >> 16) & 0xFFFF, lsl 16
movk \Wn, \imm & 0xFFFF
.endm
#define TEXT_SECTION .text
#define END_SECTION
#include "asm_defines_gas.h"
device_r4300_new_dynarec_hot_state_dynarec_local = (offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_dynarec_local)
saved_context = 160
/* Defines offsets for fp addressed variables */
fp_next_interrupt = offsetof_struct_new_dynarec_hot_state_next_interrupt
fp_cycle_count = offsetof_struct_new_dynarec_hot_state_cycle_count
fp_pending_exception = offsetof_struct_new_dynarec_hot_state_pending_exception
fp_pcaddr = offsetof_struct_new_dynarec_hot_state_pcaddr
fp_stop = offsetof_struct_new_dynarec_hot_state_stop
fp_invc_ptr = offsetof_struct_new_dynarec_hot_state_invc_ptr
fp_fcr0 = offsetof_struct_new_dynarec_hot_state_fcr0
fp_fcr31 = offsetof_struct_new_dynarec_hot_state_fcr31
fp_regs = offsetof_struct_new_dynarec_hot_state_regs
fp_hi = offsetof_struct_new_dynarec_hot_state_hi
fp_lo = offsetof_struct_new_dynarec_hot_state_lo
fp_cp0_regs = offsetof_struct_new_dynarec_hot_state_cp0_regs
fp_cp1_regs_simple = offsetof_struct_new_dynarec_hot_state_cp1_regs_simple
fp_cp1_regs_double = offsetof_struct_new_dynarec_hot_state_cp1_regs_double
fp_rounding_modes = offsetof_struct_new_dynarec_hot_state_rounding_modes
fp_branch_target = offsetof_struct_new_dynarec_hot_state_branch_target
fp_pc = offsetof_struct_new_dynarec_hot_state_pc
fp_fake_pc = offsetof_struct_new_dynarec_hot_state_fake_pc
fp_mini_ht = offsetof_struct_new_dynarec_hot_state_mini_ht
fp_restore_candidate = offsetof_struct_new_dynarec_hot_state_restore_candidate
fp_memory_map = offsetof_struct_new_dynarec_hot_state_memory_map
TEXT_SECTION
GLOBAL_FUNCTION(jump_vaddr_x0):
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x1):
mov w0, w1
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x2):
mov w0, w2
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x3):
mov w0, w3
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x4):
mov w0, w4
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x5):
mov w0, w5
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x6):
mov w0, w6
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x8):
mov w0, w8
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x9):
mov w0, w9
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x10):
mov w0, w10
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x11):
mov w0, w11
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x12):
mov w0, w12
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x13):
mov w0, w13
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x14):
mov w0, w14
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x15):
mov w0, w15
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x16):
mov w0, w16
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x17):
mov w0, w17
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x18):
brk 0 /*trampoline jumps uses x18*/
mov w0, w18
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x19):
mov w0, w19
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x20):
brk 0 /*address in cycle count*/
mov w0, w20
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x21):
mov w0, w21
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x22):
mov w0, w22
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x23):
mov w0, w23
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x24):
mov w0, w24
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x25):
mov w0, w25
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x26):
mov w0, w26
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x27):
mov w0, w27
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x28):
mov w0, w28
b jump_vaddr
GLOBAL_FUNCTION(jump_vaddr_x7):
mov w0, w7
GLOBAL_FUNCTION(jump_vaddr):
str w20, [x29, #fp_cycle_count]
ldr w18, [x29, #fp_next_interrupt]
add w20, w18, w20 /* Count */
str w20, [x29, #fp_cp0_regs+36] /* Count */
bl get_addr_ht
ldr w20, [x29, #fp_cycle_count]
br x0
GLOBAL_FUNCTION(verify_code_ds):
GLOBAL_FUNCTION(verify_code_vm):
/* w0 = instruction pointer (virtual address) */
/* w1 = source (virtual address) */
/* x2 = copy */
/* w3 = length */
mov w4, #0xC0000000
cmp w1, w4
b.lt verify_code
add x8, x29, #fp_memory_map
lsr w4, w1, #12
add w5, w1, w3
sub w5, w5, #1
ldr x6, [x8, x4, lsl #3]
lsr w5, w5, #12
tst x6, x6
b.mi .D4
mov x7, x6
add x1, x1, x6, lsl #2
lsl x6, x6, #2
.C1:
add w4, w4, #1
cmp x6, x7, lsl #2
b.ne .D4
ldr x7, [x8, x4, lsl #3]
cmp w4, w5
b.ls .C1
/*TODO: Optimize for 64bit*/
GLOBAL_FUNCTION(verify_code):
/* x1 = source */
/* x2 = copy */
/* w3 = length */
tst w3, #4
add x3, x1, x3
mov w4, #0
mov w5, #0
mov w12, #0
b.eq .D1
ldr w4, [x1], #4
ldr w5, [x2], #4
.D1:
cmp x1, x3
b.eq .D3
.D2:
ldr w7, [x1], #4
eor w9, w4, w5
ldr w8, [x2], #4
orr w9, w9, w12
tst w9, w9
b.ne .D4
ldr w4, [x1], #4
eor w12, w7, w8
ldr w5, [x2], #4
cmp x1, x3
b.cc .D2
cmp w7, w8
b.ne .D4
.D3:
cmp w4, w5
b.eq .D5
.D4:
bl get_addr
br x0
.D5:
ret
GLOBAL_FUNCTION(cc_interrupt):
ldr w0, [x29, #fp_next_interrupt]
add w20, w0, w20 /* Count */
str wzr, [x29, #fp_pending_exception]
mov w1, #0x1fc
lsr w0, w20, #19
and w1, w1, w0
add x0, x29, #fp_restore_candidate
str w20, [x29, #fp_cp0_regs+36] /* Count */
ldr w22, [x1, x0]
mov x20, x30 /* Save link register */
tst w22, w22
b.ne .E4
.E1:
bl dynarec_gen_interrupt
mov x30, x20 /* Restore link register */
ldr w20, [x29, #fp_cp0_regs+36] /* Count */
ldr w0, [x29, #fp_next_interrupt]
ldr w1, [x29, #fp_pending_exception]
ldr w2, [x29, #fp_stop]
sub w20, w20, w0
tst w2, w2
b.ne .E3
tst w1, w1
b.ne .E2
ret
.E2:
ldr w0, [x29, #fp_pcaddr]
bl get_addr_ht
br x0
.E3:
add x16, x29, #saved_context
ldp x19,x20,[x16,#0]
ldp x21,x22,[x16,#16]
ldp x23,x24,[x16,#32]
ldp x25,x26,[x16,#48]
ldp x27,x28,[x16,#64]
ldp x29,x30,[x16,#80]
ret
.E4:
/* Move 'dirty' blocks to the 'clean' list */
str wzr, [x1, x0]
lsl w21, w1, #3
mov w23, #0
.E5:
tst w22, #1
b.eq .E6
add w0, w21, w23
bl clean_blocks
.E6:
lsr w22, w22, #1
add w23, w23, #1
tst w23, #31
b.ne .E5
b .E1
GLOBAL_FUNCTION(do_interrupt):
ldr w0, [x29, #fp_pcaddr]
bl get_addr_ht
ldr w1, [x29, #fp_next_interrupt]
ldr w20, [x29, #fp_cp0_regs+36] /* Count */
sub w20, w20, w1
br x0
GLOBAL_FUNCTION(fp_exception):
mov w2, #0x10000000
add w2, w2, #0x2c
.E7:
ldr w1, [x29, #fp_cp0_regs+48] /* Status */
mov w3, #0x80000000
str w0, [x29, #fp_cp0_regs+56] /* EPC */
orr w1, w1, #2
str w1, [x29, #fp_cp0_regs+48] /* Status */
str w2, [x29, #fp_cp0_regs+52] /* Cause */
add w0, w3, #0x180
bl get_addr_ht
br x0
GLOBAL_FUNCTION(fp_exception_ds):
mov w2, #0x90000000 /* Set high bit if delay slot */
add w2, w2, #0x2c
b .E7
GLOBAL_FUNCTION(jump_syscall):
mov w2, #0x20
b .E7
GLOBAL_FUNCTION(jump_eret):
ldr w1, [x29, #fp_cp0_regs+48] /* Status */
ldr w0, [x29, #fp_next_interrupt]
bic w1, w1, #2
add w20, w0, w20
str w1, [x29, #fp_cp0_regs+48] /* Status */
str w20, [x29, #fp_cp0_regs+36] /* Count */
bl new_dynarec_check_interrupt
ldr w1, [x29, #fp_next_interrupt]
ldr w0, [x29, #fp_cp0_regs+56] /* EPC */
subs w20, w20, w1
b.pl .E11
.E8:
add x6, x29, #fp_regs+256
mov w5, #248
mov w1, #0
.E9:
/*TODO: Optimize for 64bit*/
sub x6, x6, #8
ldr w2, [x6]
ldr w3, [x6, #4]
eor w3, w3, w2, asr #31
subs w3, w3, #1
adc w1, w1, w1
subs w5, w5, #8
b.ne .E9
ldr w2, [x29, #fp_hi]
ldr w3, [x29, #fp_hi+4]
eor w3, w3, w2, asr #31
tst w3, w3
b.ne .E10
ldr w2, [x29, #fp_lo]
ldr w3, [x29, #fp_lo+4]
eor w3, w3, w2, asr #31
.E10:
subs w3, w3, #1
adc w1, w1, w1
bl get_addr_32
br x0
.E11:
str w0, [x29, #fp_pcaddr]
bl cc_interrupt
ldr w0, [x29, #fp_pcaddr]
b .E8
GLOBAL_FUNCTION(new_dyna_start):
adrp x16, g_dev
add x16, x16, :lo12:g_dev
movl x1, (device_r4300_new_dynarec_hot_state_dynarec_local + saved_context)
add x16, x16, x1
adrp x1, base_addr
add x1, x1, :lo12:base_addr
mov w0, #0xa4000000
stp x19,x20,[x16,#0]
stp x21,x22,[x16,#16]
stp x23,x24,[x16,#32]
stp x25,x26,[x16,#48]
stp x27,x28,[x16,#64]
stp x29,x30,[x16,#80]
sub x29, x16, #saved_context
ldr x19, [x1]
add w0, w0, #0x40
bl new_recompile_block
ldr w0, [x29, #fp_next_interrupt]
ldr w20, [x29, #fp_cp0_regs+36] /* Count */
sub w20, w20, w0
br x19
GLOBAL_FUNCTION(breakpoint):
brk 0
ret
END_SECTION

View File

@ -1971,7 +1971,15 @@ static void *dynamic_linker(void * src, u_int vaddr)
while(head!=NULL) {
if(head->vaddr==vaddr&&head->reg32==0) {
#if NEW_DYNAREC == NEW_DYNAREC_ARM64
//TODO: Avoid disabling link between blocks for conditional branches
int *ptr=(int*)src;
if((*ptr&0xfc000000)==0x14000000) { //b
add_link(vaddr, add_pointer(src,head->addr));
}
#else
add_link(vaddr, add_pointer(src,head->addr));
#endif
return head->addr;
}
head=head->next;
@ -2589,11 +2597,13 @@ static void shift_alloc(struct regstat *current,int i)
if(rs2[i]) alloc_reg(current,i,rs2[i]);
alloc_reg64(current,i,rt1[i]);
current->is32&=~(1LL<<rt1[i]);
#if NEW_DYNAREC!=NEW_DYNAREC_ARM64
if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
{
alloc_reg_temp(current,i,-1);
minimum_free_regs[i]=1;
}
#endif
}
clear_const(current,rs1[i]);
clear_const(current,rs2[i]);
@ -4834,7 +4844,7 @@ static void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,u
if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
int nr;
if((nr=get_reg(entry,pre[hr]))>=0) {
#if NEW_DYNAREC == NEW_DYNAREC_X64
#ifdef NATIVE_64
if(pre[hr]>=INVCP) emit_mov64(hr,nr);
else
#endif
@ -5946,10 +5956,19 @@ static void do_ccstub(int n)
if(internal_branch(branch_regs[i].is32,ba[i]))
load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry);
else if(itype[i]==RJUMP) {
if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
emit_readword((intptr_t)&g_dev.r4300.new_dynarec_hot_state.pcaddr,get_reg(branch_regs[i].regmap,RTEMP));
else
emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i]));
int r=get_reg(branch_regs[i].regmap,rs1[i]);
if((rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])&&(rs1[i]!=0)) {
r=get_reg(branch_regs[i].regmap,RTEMP);
}
#if NEW_DYNAREC==NEW_DYNAREC_ARM64
if(r==18) {
// x18 is used for trampoline jumps, move it to another register (x0)
emit_mov(r,0);
r=0;
stubs[n][2]=jump_vaddr_reg[0];
}
#endif
emit_readword((intptr_t)&g_dev.r4300.new_dynarec_hot_state.pcaddr,r);
}
}else if(stubs[n][6]==NOTTAKEN) {
if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
@ -6027,17 +6046,11 @@ static void ujump_assemble(int i,struct regstat *i_regs)
if(rt>=0) {
#ifdef USE_MINI_HT
if(internal_branch(branch_regs[i].is32,return_address)) {
int temp=rt+1;
if(temp==EXCLUDE_REG||temp>=HOST_REGS||
branch_regs[i].regmap[temp]>=0)
{
temp=get_reg(branch_regs[i].regmap,-1);
}
int temp=-1; //x86 doesn't need a temp reg
#ifdef HOST_TEMPREG
if(temp<0) temp=HOST_TEMPREG;
#endif
if(temp>=0) do_miniht_insert(return_address,rt,temp);
else emit_movimm(return_address,rt);
do_miniht_insert(return_address,rt,temp);
}
else
#endif
@ -6180,8 +6193,13 @@ static void rjump_assemble(int i,struct regstat *i_regs)
else
#endif
{
//if(rs!=EAX) emit_mov(rs,EAX);
//emit_jmp((intptr_t)jump_vaddr_eax);
#if NEW_DYNAREC==NEW_DYNAREC_ARM64
if(rs==18) {
// x18 is used for trampoline jumps, move it to another register (x0)
emit_mov(rs,0);
rs=0;
}
#endif
emit_jmp(jump_vaddr_reg[rs]);
}
/* Check hash table
@ -7571,15 +7589,25 @@ void new_dynarec_init(void)
#if NEW_DYNAREC >= NEW_DYNAREC_ARM
#if !defined(WIN32)
if ((base_addr = mmap ((u_char *)g_dev.r4300.extra_memory, 1<<TARGET_SIZE_2,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0)) <= 0) {DebugMessage(M64MSG_ERROR, "mmap() failed");}
#define FIXED_CACHE_ADDR
#ifdef FIXED_CACHE_ADDR
base_addr = mmap ((u_char *)g_dev.r4300.extra_memory, 1<<TARGET_SIZE_2,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
#else
base_addr = mmap (NULL, 1<<TARGET_SIZE_2,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
#endif
if(base_addr==(void*)-1) DebugMessage(M64MSG_ERROR, "mmap() failed");
#endif
#else
#if defined(WIN32)
DWORD dummy;
assert(VirtualProtect((void*)g_dev.r4300.extra_memory, 33554432, PAGE_EXECUTE_READWRITE, &dummy));
BOOL res=VirtualProtect((void*)g_dev.r4300.extra_memory, 33554432, PAGE_EXECUTE_READWRITE, &dummy);
assert(res!=0);
base_addr = (void*)g_dev.r4300.extra_memory;
#else
if ((base_addr = mmap (NULL, 1<<TARGET_SIZE_2,
@ -7589,7 +7617,7 @@ void new_dynarec_init(void)
#endif
#endif
assert(((uintptr_t)g_dev.rdram.dram&3)==0); // 4 bytes aligned
assert(((uintptr_t)g_dev.rdram.dram&(sizeof(uintptr_t)-1))==0); // 4/8 bytes aligned
out=(u_char *)base_addr;
g_dev.r4300.new_dynarec_hot_state.pc = &g_dev.r4300.new_dynarec_hot_state.fake_pc;
@ -8555,7 +8583,7 @@ int new_recompile_block(int addr)
#ifdef USE_MINI_HT
if(rs1[i]==31) { // JALR
alloc_reg(&current,i,RHASH);
#ifndef HOST_IMM_ADDR32
#if !defined(HOST_IMM_ADDR32)&&(NEW_DYNAREC!=NEW_DYNAREC_X64)
alloc_reg(&current,i,RHTBL);
#endif
}
@ -8957,7 +8985,7 @@ int new_recompile_block(int addr)
#ifdef USE_MINI_HT
if(rs1[i-1]==31) { // JALR
alloc_reg(&branch_regs[i-1],i-1,RHASH);
#ifndef HOST_IMM_ADDR32
#if !defined(HOST_IMM_ADDR32)&&(NEW_DYNAREC!=NEW_DYNAREC_X64)
alloc_reg(&branch_regs[i-1],i-1,RHTBL);
#endif
}
@ -10774,7 +10802,13 @@ int new_recompile_block(int addr)
void *stub=out;
void *addr=check_addr(link_addr[i][1]);
emit_extjump(link_addr[i][0],link_addr[i][1]);
#if NEW_DYNAREC==NEW_DYNAREC_ARM64
//TODO: Avoid disabling link between blocks for conditional branches
u_char *ptr=(u_char *)link_addr[i][0];
if(addr&&((ptr[3]&0xfc)==0x14)) {
#else
if(addr) {
#endif
set_jump_target(link_addr[i][0],(intptr_t)addr);
add_link(link_addr[i][1],stub);
}

View File

@ -47,7 +47,7 @@ struct new_dynarec_hot_state
#ifdef NEW_DYNAREC
/* 0-6: used by dynarec to push/pop caller-saved register (r0-r3, r12) and possibly lr (see invalidate_addr)
7-15: saved_context*/
#if NEW_DYNAREC == NEW_DYNAREC_ARM64
#if (NEW_DYNAREC == NEW_DYNAREC_ARM64) || (NEW_DYNAREC == NEW_DYNAREC_X64)
uint64_t dynarec_local[32];
#else
uint32_t dynarec_local[16];

View File

@ -33,8 +33,10 @@
#error arm dynarec debug only available on x86
#endif
#elif RECOMPILER_DEBUG == 4 //ARM64
#if NEW_DYNAREC != 2 //x64
#error arm64 dynarec debug only available on x64
#endif
#endif
#undef NEW_DYNAREC
#define NEW_DYNAREC RECOMPILER_DEBUG
@ -121,35 +123,6 @@ void jump_vaddr_x25(void){}
void jump_vaddr_x26(void){}
void jump_vaddr_x27(void){}
void jump_vaddr_x28(void){}
void invalidate_addr_x0(void){}
void invalidate_addr_x1(void){}
void invalidate_addr_x2(void){}
void invalidate_addr_x3(void){}
void invalidate_addr_x4(void){}
void invalidate_addr_x5(void){}
void invalidate_addr_x6(void){}
void invalidate_addr_x7(void){}
void invalidate_addr_x8(void){}
void invalidate_addr_x9(void){}
void invalidate_addr_x10(void){}
void invalidate_addr_x11(void){}
void invalidate_addr_x12(void){}
void invalidate_addr_x13(void){}
void invalidate_addr_x14(void){}
void invalidate_addr_x15(void){}
void invalidate_addr_x16(void){}
void invalidate_addr_x17(void){}
void invalidate_addr_x18(void){}
void invalidate_addr_x19(void){}
void invalidate_addr_x20(void){}
void invalidate_addr_x21(void){}
void invalidate_addr_x22(void){}
void invalidate_addr_x23(void){}
void invalidate_addr_x24(void){}
void invalidate_addr_x25(void){}
void invalidate_addr_x26(void){}
void invalidate_addr_x27(void){}
void invalidate_addr_x28(void){}
static void __clear_cache(char* begin, char *end){}
#endif
@ -169,7 +142,12 @@ ALIGN(4096, static char recomp_dbg_extra_memory[33554432]);
#define BRANCH_INST 0x10a
#define ARCH_NAME "x86"
#elif RECOMPILER_DEBUG == NEW_DYNAREC_X64
#error to be done
#define ARCHITECTURE CS_ARCH_X86
#define MODE CS_MODE_64
#define INSTRUCTION instr[i].detail->x86
#define CALL_INST 0x38
#define BRANCH_INST 0x10a
#define ARCH_NAME "x64"
#elif RECOMPILER_DEBUG == NEW_DYNAREC_ARM
#define ARCHITECTURE CS_ARCH_ARM
#define MODE CS_MODE_LITTLE_ENDIAN
@ -451,35 +429,6 @@ static Function_t func[] = {
{(intptr_t)jump_vaddr_x26, "jump_vaddr_x26"},
{(intptr_t)jump_vaddr_x27, "jump_vaddr_x27"},
{(intptr_t)jump_vaddr_x28, "jump_vaddr_x28"},
{(intptr_t)invalidate_addr_x0," invalidate_addr_x0"},
{(intptr_t)invalidate_addr_x1," invalidate_addr_x1"},
{(intptr_t)invalidate_addr_x2," invalidate_addr_x2"},
{(intptr_t)invalidate_addr_x3," invalidate_addr_x3"},
{(intptr_t)invalidate_addr_x4," invalidate_addr_x4"},
{(intptr_t)invalidate_addr_x5," invalidate_addr_x5"},
{(intptr_t)invalidate_addr_x6," invalidate_addr_x6"},
{(intptr_t)invalidate_addr_x7," invalidate_addr_x7"},
{(intptr_t)invalidate_addr_x8," invalidate_addr_x8"},
{(intptr_t)invalidate_addr_x9," invalidate_addr_x9"},
{(intptr_t)invalidate_addr_x10," invalidate_addr_x10"},
{(intptr_t)invalidate_addr_x11," invalidate_addr_x11"},
{(intptr_t)invalidate_addr_x12," invalidate_addr_x12"},
{(intptr_t)invalidate_addr_x13," invalidate_addr_x13"},
{(intptr_t)invalidate_addr_x14," invalidate_addr_x14"},
{(intptr_t)invalidate_addr_x15," invalidate_addr_x15"},
{(intptr_t)invalidate_addr_x16," invalidate_addr_x16"},
{(intptr_t)invalidate_addr_x17," invalidate_addr_x17"},
{(intptr_t)invalidate_addr_x18," invalidate_addr_x18"},
{(intptr_t)invalidate_addr_x19," invalidate_addr_x19"},
{(intptr_t)invalidate_addr_x20," invalidate_addr_x20"},
{(intptr_t)invalidate_addr_x21," invalidate_addr_x21"},
{(intptr_t)invalidate_addr_x22," invalidate_addr_x22"},
{(intptr_t)invalidate_addr_x23," invalidate_addr_x23"},
{(intptr_t)invalidate_addr_x24," invalidate_addr_x24"},
{(intptr_t)invalidate_addr_x25," invalidate_addr_x25"},
{(intptr_t)invalidate_addr_x26," invalidate_addr_x26"},
{(intptr_t)invalidate_addr_x27," invalidate_addr_x27"},
{(intptr_t)invalidate_addr_x28," invalidate_addr_x28"},
#endif
{(intptr_t)dyna_linker, "dyna_linker"},
{(intptr_t)dyna_linker_ds, "dyna_linker_ds"},
@ -576,6 +525,7 @@ static Function_t func[] = {
{(intptr_t)abs_d, "abs_d"},
{(intptr_t)mov_d, "mov_d"},
{(intptr_t)neg_d, "neg_d"},
{(intptr_t)breakpoint, "breakpoint"},
{-1, NULL}
};
@ -812,13 +762,20 @@ static void debugging(int i, FILE * pFile)
static int disasm_block[] = {0xa4000040};
static void replace_addr(int32_t addr, Variable_t * var, char * op_str, size_t size)
static void replace_addr(intptr_t real_addr, intptr_t addr, size_t addr_size, Variable_t * var, char * op_str, size_t op_size)
{
char right[256];
char addr_str[16];
char addr_str[32];
char * ptr = NULL;
char * ptr2 = NULL;
sprintf(addr_str, "0x%x", addr);
if(addr_size == 4)
sprintf(addr_str, "0x%x", addr);
else if(addr_size == 8)
sprintf(addr_str, "0x%llx", addr);
else
assert(0);
ptr = strstr(op_str, addr_str);
if(ptr == NULL) {
@ -829,13 +786,19 @@ static void replace_addr(int32_t addr, Variable_t * var, char * op_str, size_t s
assert(*(ptr-2) == '-');
*(ptr-2) = '+';
}
*ptr = '\0';
memcpy(right, (ptr + strlen(addr_str)), size - (ptr - op_str)); /* copy right part after address */
if((addr - var->addr) == 0)
snprintf(op_str, size, "%s%s%s", op_str, var->name, right);
ptr2 = strstr(op_str, "rip");
if(ptr2 == NULL)
*ptr = '\0';
else
snprintf(op_str, size, "%s%s+%d%s", op_str, var->name, (addr - var->addr), right);
*ptr2 = '\0';
memcpy(right, (ptr + strlen(addr_str)), op_size - (ptr - op_str)); /* copy right part after address */
if((real_addr - var->addr) == 0)
snprintf(op_str, op_size, "%s%s%s", op_str, var->name, right);
else
snprintf(op_str, op_size, "%s%s+%d%s", op_str, var->name, (real_addr - var->addr), right);
}
void recomp_dbg_init(void)
@ -860,6 +823,35 @@ void recomp_dbg_init(void)
/* Capstone init */
if(cs_open(ARCHITECTURE, MODE, &handle) != CS_ERR_OK) return;
cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
#if RECOMPILER_DEBUG >= NEW_DYNAREC_ARM
FILE * pFile = fopen ("jump_table.txt","w");
uintptr_t * src = (uintptr_t *)((char *)base_addr+(1<<TARGET_SIZE_2)-JUMP_TABLE_SIZE);
while((char *)src<(char *)base_addr+(1<<TARGET_SIZE_2))
{
cs_insn *instr;
size_t count = cs_disasm(handle, (uint8_t*)src, sizeof(uintptr_t), (uintptr_t)src, 0, &instr);
for(uint32_t i = 0; i < count; i++)
fprintf(pFile, "0x%" PRIxPTR ": %s %s\n", (uintptr_t)instr[i].address, instr[i].mnemonic, instr[i].op_str);
cs_free(instr, count);
src++;
uint32_t j = 0;
while(func[j].addr != -1) {
if(*src>>2 == func[j].addr>>2)
break;
j++;
}
fprintf(pFile, "0x%" PRIxPTR ": 0x%" PRIxPTR " (%s)\n", (uintptr_t)src, (uintptr_t)*src, func[j].name);
src++;
}
fflush(pFile);
fclose(pFile);
#endif
}
void recomp_dbg_cleanup(void)
@ -903,37 +895,92 @@ extern unsigned int using_tlb;
end=(uint32_t *)recomp_dbg_out;
#if 0
struct ll_entry *head;
u_int page;
for(int i=0;i<linkcount;i++){
if(!link_addr[i][2])
dynamic_linker((void*)link_addr[i][0],0xa4000044);
if(!link_addr[i][2]) { //external jumps
int already_linked=0;
page=(0x80000000^link_addr[i][1])>>12;
if(page>262143&&g_dev.r4300.cp0.tlb.LUT_r[link_addr[i][1]>>12]) page=(g_dev.r4300.cp0.tlb.LUT_r[page^0x80000]^0x80000000)>>12;
if(page>2048) page=2048+(page&2047);
head=jump_in[page];
while(head!=NULL) {
if(head->vaddr==link_addr[i][1]&&head->reg32==0) {
already_linked=1;
break;
}
head=head->next;
}
if(already_linked==0)
dynamic_linker((void*)link_addr[i][0],addr); //linking on itself
}
}
for(int i = 0; i < 4096; i++)
{
struct ll_entry *head;
head=jump_out[i];
while(head!=NULL) {
intptr_t addr=get_pointer(head->addr);
addr=(intptr_t)kill_pointer(head->addr);
head=head->next;
}
page=(0x80000000^addr)>>12;
if(page>262143&&g_dev.r4300.cp0.tlb.LUT_r[addr>>12]) page=(g_dev.r4300.cp0.tlb.LUT_r[page^0x80000]^0x80000000)>>12;
if(page>2048) page=2048+(page&2047);
head=jump_dirty[i];
while(head!=NULL) {
verify_dirty(head->addr);
//Check current recompiled address (not all entry points)
int found=0;
head=jump_in[page];
while(head!=NULL) {
if(head->vaddr==addr&&head->reg32==0) {
assert(found==0); //No possible duplicates in jump_in
assert(isclean(head->addr)==1); //Just being recompiled so not dirty
found=1;
}
head=head->next;
}
page=(0x80000000^addr)>>12;
if(page>262143&&g_dev.r4300.cp0.tlb.LUT_r[addr>>12]) page&=2047; // jump_dirty uses a hash of the virtual address instead
if(page>2048) page=2048+(page&2047);
int not_dirty=0;
found=0;
head=jump_dirty[page];
while(head!=NULL) {
if(head->vaddr==addr&&head->reg32==0) {
if(found==1)
printf("Duplicates in jump_dirty\n");
//Possible duplicates in jump_dirty, only one could be clean at a time
if(not_dirty==0)
not_dirty=verify_dirty(head->addr);
else
{
assert(verify_dirty(head->addr)==0);
}
assert(isclean(head->addr)==0);
void* clean=get_clean_addr(head->addr);
assert(isclean(clean)==1);
//TODO: assert get_bounds
uintptr_t start,end;
get_bounds(head->addr, &start, &end);
isclean(head->addr);
void* clean=get_clean_addr(head->addr);
head=head->next;
}
head=jump_in[i];
while(head!=NULL) {
isclean(head->addr);
head=head->next;
found=1;
}
head=head->next;
}
page=(addr^0x80000000)>>12;
if(page>262143&&g_dev.r4300.cp0.tlb.LUT_r[addr>>12]) page=(g_dev.r4300.cp0.tlb.LUT_r[addr>>12]^0x80000000)>>12;
if(page>4095) page=2048+(page&2047);
head=jump_out[page];
while(head!=NULL) {
if(head->vaddr==addr&&head->reg32==0) {
intptr_t addr=get_pointer(head->addr);
addr=(intptr_t)kill_pointer(head->addr);
}
head=head->next;
}
#endif
int disasm=0;
@ -1004,32 +1051,39 @@ extern unsigned int using_tlb;
}
}
#else
if(INSTRUCTION.disp || INSTRUCTION.operands[1].imm) {
if(INSTRUCTION.disp /*|| (INSTRUCTION.operands[1].imm && INSTRUCTION.operands[1].type == X86_OP_IMM)*/) {
char op_str[256];
uint32_t j = 0;
uint32_t off = 0;
int64_t addr = INSTRUCTION.disp;
strcpy(op_str, instr[i].op_str);
char *ptr = strstr(op_str, "rip");
if(ptr != NULL) // rip relative
addr = addr + instr[i+1].address;
while(var[j].addr != -1) {
if(INSTRUCTION.disp >= var[j].addr && INSTRUCTION.disp < (var[j].addr + var[j].size))
if(addr >= var[j].addr && addr < (var[j].addr + var[j].size))
break;
j++;
}
//TODO: remove replace_addr?
if(var[j].addr != -1)
replace_addr(INSTRUCTION.disp, &var[j], op_str, sizeof(op_str));
replace_addr(addr, INSTRUCTION.disp, sizeof(INSTRUCTION.disp), &var[j], op_str, sizeof(op_str));
uint32_t k = 0;
while(var[k].addr != -1) {
if(INSTRUCTION.operands[1].imm >= var[k].addr && INSTRUCTION.operands[1].imm < (var[k].addr + var[k].size))
break;
k++;
}
//uint32_t k = 0;
//while(var[k].addr != -1) {
// if(INSTRUCTION.operands[1].imm >= var[k].addr && INSTRUCTION.operands[1].imm < (var[k].addr + var[k].size))
// break;
// k++;
//}
//
//if(var[k].addr != -1)
// replace_addr(addr, INSTRUCTION.operands[1].imm, sizeof(INSTRUCTION.operands[1].imm), &var[k], op_str, sizeof(op_str));
if(var[k].addr != -1)
replace_addr(INSTRUCTION.operands[1].imm, &var[k], op_str, sizeof(op_str));
if((var[j].addr != -1) || (var[k].addr != -1))
if((var[j].addr != -1) /*|| (var[k].addr != -1)*/)
{
fprintf(pFile, "0x%" PRIxPTR ": %s %s\n", (uintptr_t)instr[i].address, instr[i].mnemonic, op_str);
continue;
@ -1040,11 +1094,11 @@ extern unsigned int using_tlb;
if(instr[i].id == CALL_INST || instr[i].id == BRANCH_INST) {
uint32_t j = 0;
intptr_t addr = (intptr_t)INSTRUCTION.operands[0].imm;
int * paddr = (int*)addr;
intptr_t * paddr = (intptr_t*)addr;
while(func[j].addr != -1) {
#if RECOMPILER_DEBUG == NEW_DYNAREC_ARM
if((addr>>2 == func[j].addr>>2) || ((*(paddr+1))>>2 == func[j].addr>>2)) // check jump_table_symbols on ARM
#if RECOMPILER_DEBUG >= NEW_DYNAREC_ARM
if((addr>>2 == func[j].addr>>2) || ((*(paddr+1))>>2 == func[j].addr>>2)) // check jump_table_symbols on ARM
#else
if(addr>>2 == func[j].addr>>2)
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,77 @@
#ifndef M64P_DEVICE_R4300_NEW_DYNAREC_X64_ASSEM_X64_H
#define M64P_DEVICE_R4300_NEW_DYNAREC_X64_ASSEM_X64_H
#define EAX 0
#define ECX 1
#define EDX 2
#define EBX 3
#define ESP 4
#define EBP 5
#define ESI 6
#define EDI 7
#define RAX 0
#define RCX 1
#define RDX 2
#define RBX 3
#define RSP 4
#define RBP 5
#define RSI 6
#define RDI 7
#define R8 8
#define R9 9
#define R10 10
#define R11 11
#define R12 12
#define R13 13
#define R14 14
#define R15 15
#define HOST_REGS 8
#define HOST_BTREG EBP
#define EXCLUDE_REG ESP
#define HOST_TEMPREG R15
//#define IMM_PREFETCH 1
#define NATIVE_64 1
#define RAM_OFFSET 1
#define NEED_INVC_PTR 1
#define INVERTED_CARRY 1
//#define DESTRUCTIVE_WRITEBACK 1
#define DESTRUCTIVE_SHIFT 1
#define USE_MINI_HT 1
#define TARGET_SIZE_2 25 // 2^25 = 32 megabytes
#define JUMP_TABLE_SIZE 0 // Not needed for x86
#ifdef _MSC_VER
/* Microsoft x64 calling convention:
func(rcx, rdx, r8, r9) {return rax;}
callee-save: %rbx %rbp %rdi %rsi %rsp %r12-%r15
The registers RAX, RCX, RDX, R8, R9, R10, R11 are considered volatile (caller-saved).
The registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, and R15 are considered nonvolatile (callee-saved).*/
#define ARG1_REG ECX
#define ARG2_REG EDX
#define ARG3_REG R8
#define ARG4_REG R9
#define CALLER_SAVED_REGS 0xF07
#define HOST_CCREG ESI
#else
/* amd64 calling convention:
func(rdi, rsi, rdx, rcx, r8, r9) {return rax;}
callee-save: %rbp %rbx %r12-%r15
The registers RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11 are considered volatile (caller-saved).
The registers RBX, RBP, RSP, R12, R13, R14, and R15 are considered nonvolatile (callee-saved).*/
#define ARG1_REG EDI
#define ARG2_REG ESI
#define ARG3_REG EDX
#define ARG4_REG ECX
#define CALLER_SAVED_REGS 0xFC7
#define HOST_CCREG EBX
#endif
#endif /* M64P_DEVICE_R4300_NEW_DYNAREC_X64_ASSEM_X64_H */

View File

@ -0,0 +1,430 @@
;Mupen64plus - linkage_x86.asm
;Copyright (C) 2009-2011 Ari64
;
;This program is free software; you can redistribute it and/or modify
;it under the terms of the GNU General Public License as published by
;the Free Software Foundation; either version 2 of the License, or
;(at your option) any later version.
;
;This program is distributed in the hope that it will be useful,
;but WITHOUT ANY WARRANTY; without even the implied warranty of
;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;GNU General Public License for more details.
;
;You should have received a copy of the GNU General Public License
;along with this program; if not, write to the
;Free Software Foundation, Inc.,
;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
%include "asm_defines_nasm.h"
%ifdef LEADING_UNDERSCORE
%macro cglobal 1
global _%1
%define %1 _%1
%endmacro
%macro cextern 1
extern _%1
%define %1 _%1
%endmacro
%else
%macro cglobal 1
global %1
%endmacro
%macro cextern 1
extern %1
%endmacro
%endif
%ifdef WIN64
%define ARG1_REG ecx
%define ARG2_REG edx
%define ARG3_REG r8d
%define ARG4_REG r9d
%define ARG1_REG64 rcx
%define ARG2_REG64 rdx
%define ARG3_REG64 r8
%define ARG4_REG64 r9
%define CCREG esi
%define CCREG64 rsi
%else
%define ARG1_REG edi
%define ARG2_REG esi
%define ARG3_REG edx
%define ARG4_REG ecx
%define ARG1_REG64 rdi
%define ARG2_REG64 rsi
%define ARG3_REG64 rdx
%define ARG4_REG64 rcx
%define CCREG ebx
%define CCREG64 rbx
%endif
%define g_dev_r4300_cached_interp_invalid_code (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_cached_interp + offsetof_struct_cached_interp_invalid_code)
%define g_dev_r4300_new_dynarec_hot_state_stop (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_stop)
%define g_dev_r4300_new_dynarec_hot_state_regs (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_regs)
%define g_dev_r4300_new_dynarec_hot_state_hi (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_hi)
%define g_dev_r4300_new_dynarec_hot_state_lo (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_lo)
%define g_dev_r4300_new_dynarec_hot_state_cp0_regs (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_cp0_regs)
%define g_dev_r4300_new_dynarec_hot_state_next_interrupt (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_next_interrupt)
%define g_dev_r4300_new_dynarec_hot_state_cycle_count (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_cycle_count)
%define g_dev_r4300_new_dynarec_hot_state_pending_exception (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_pending_exception)
%define g_dev_r4300_new_dynarec_hot_state_pcaddr (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_pcaddr)
%define g_dev_r4300_new_dynarec_hot_state_branch_target (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_branch_target)
%define g_dev_r4300_new_dynarec_hot_state_restore_candidate (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_restore_candidate)
%define g_dev_r4300_new_dynarec_hot_state_memory_map (g_dev + offsetof_struct_device_r4300 + offsetof_struct_r4300_core_new_dynarec_hot_state + offsetof_struct_new_dynarec_hot_state_memory_map)
cglobal jump_vaddr_eax
cglobal jump_vaddr_ecx
cglobal jump_vaddr_edx
cglobal jump_vaddr_ebx
cglobal jump_vaddr_ebp
cglobal jump_vaddr_esi
cglobal jump_vaddr_edi
cglobal verify_code_ds
cglobal verify_code_vm
cglobal verify_code
cglobal cc_interrupt
cglobal do_interrupt
cglobal fp_exception
cglobal fp_exception_ds
cglobal jump_syscall
cglobal jump_eret
cglobal new_dyna_start
cglobal invalidate_block_eax
cglobal invalidate_block_ecx
cglobal invalidate_block_edx
cglobal invalidate_block_ebx
cglobal invalidate_block_ebp
cglobal invalidate_block_esi
cglobal invalidate_block_edi
cglobal breakpoint
cextern base_addr
cextern new_recompile_block
cextern get_addr_ht
cextern get_addr
cextern dynarec_gen_interrupt
cextern clean_blocks
cextern invalidate_block
cextern new_dynarec_check_interrupt
cextern get_addr_32
cextern g_dev
section .bss
align 4
section .rodata
section .text
jump_vaddr_eax:
mov ARG1_REG, eax
jmp jump_vaddr
jump_vaddr_edx:
mov ARG1_REG, edx
jmp jump_vaddr
jump_vaddr_ebx:
%ifdef WIN64
mov ARG1_REG, ebx
jmp jump_vaddr
%else
int 3
%endif
jump_vaddr_edi:
mov ARG1_REG, edi
jmp jump_vaddr
jump_vaddr_ebp:
mov ARG1_REG, ebp
jmp jump_vaddr
jump_vaddr_esi:
%ifdef WIN64
int 3
%else
mov ARG1_REG, esi
jmp jump_vaddr
%endif
jump_vaddr_ecx:
mov ARG1_REG, ecx
jump_vaddr:
mov DWORD[rel g_dev_r4300_new_dynarec_hot_state_cycle_count], CCREG
add CCREG, [rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
mov [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36], CCREG
call get_addr_ht
mov CCREG, DWORD[rel g_dev_r4300_new_dynarec_hot_state_cycle_count]
jmp rax
verify_code_ds:
;ARG1_REG = source (virtual address)
;ARG2_REG64 = copy
;ARG3_REG = length
;ARG4_REG = pcaddr
verify_code_vm:
;ARG1_REG = source (virtual address)
;ARG2_REG64 = copy
;ARG3_REG = length
;ARG4_REG = pcaddr
cmp ARG1_REG, 0C0000000h
jl verify_code
mov [rel g_dev_r4300_new_dynarec_hot_state_cycle_count], CCREG
mov CCREG, ARG1_REG
lea r10d, [-1+ARG1_REG64+ARG3_REG64*1]
shr CCREG, 12
shr r10d, 12
mov r11, g_dev_r4300_new_dynarec_hot_state_memory_map
mov rax, [r11+CCREG64*8]
test rax, rax
js _D4
lea ARG1_REG64, [ARG1_REG64+rax*4]
_D1:
xor rax, [r11+CCREG64*8]
shl rax, 2
jne _D4
mov rax, [r11+CCREG64*8]
inc CCREG
cmp CCREG, r10d
jbe _D1
mov CCREG, [rel g_dev_r4300_new_dynarec_hot_state_cycle_count]
verify_code:
;ARG1_REG64 = source
;ARG2_REG64 = copy
;ARG3_REG = length
;ARG4_REG = pcaddr
mov eax, [-4+ARG1_REG64+ARG3_REG64*1]
xor eax, [-4+ARG2_REG64+ARG3_REG64*1]
jne _D5
mov eax, ARG3_REG
add ARG3_REG, -4
je _D3
test eax, 4
cmove ARG3_REG, eax
_D2:
mov eax, [-4+ARG1_REG64+ARG3_REG64*1]
xor eax, [-4+ARG2_REG64+ARG3_REG64*1]
jne _D5
mov eax, [-8+ARG1_REG64+ARG3_REG64*1]
xor eax, [-8+ARG2_REG64+ARG3_REG64*1]
jne _D5
add ARG3_REG64, -8
jne _D2
_D3:
ret
_D4:
mov CCREG, [rel g_dev_r4300_new_dynarec_hot_state_cycle_count]
_D5:
add rsp, -8
mov ARG1_REG, ARG4_REG
call get_addr
add rsp, 16
jmp rax
cc_interrupt:
add CCREG, DWORD[rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
add rsp, -56 ;Align stack
mov DWORD[rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36], CCREG
shr CCREG, 19
mov DWORD [rel g_dev_r4300_new_dynarec_hot_state_pending_exception], 0
and CCREG, 01fch
mov r10, g_dev_r4300_new_dynarec_hot_state_restore_candidate
cmp DWORD [r10+CCREG64], 0
jne _E4
_E1:
call dynarec_gen_interrupt
mov CCREG, DWORD[rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36]
mov eax, DWORD[rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
mov ecx, DWORD[rel g_dev_r4300_new_dynarec_hot_state_pending_exception]
mov edx, DWORD[rel g_dev_r4300_new_dynarec_hot_state_stop]
add rsp, 56
sub CCREG, eax
test edx, edx
jne _E3
test ecx, ecx
jne _E2
ret
_E2:
add rsp, -8
mov ARG1_REG, DWORD[rel g_dev_r4300_new_dynarec_hot_state_pcaddr]
mov DWORD[rel g_dev_r4300_new_dynarec_hot_state_cycle_count], CCREG
call get_addr_ht
mov CCREG, DWORD[rel g_dev_r4300_new_dynarec_hot_state_cycle_count]
add rsp, 16
jmp rax
_E3:
add rsp, 64 ;pop return address
;restore callee-save registers
pop rbp
pop r15
pop r14
pop r13
pop r12
pop rbx
%ifdef WIN64
pop rsi
pop rdi
%endif
ret ;exit dynarec
_E4:
;Move 'dirty' blocks to the 'clean' list
mov edx, DWORD[r10+CCREG64]
mov DWORD [r10+CCREG64], 0
shl CCREG, 3
mov ebp, 0
_E5:
shr edx, 1
jnc _E6
mov ARG1_REG, CCREG
add ARG1_REG, ebp
call clean_blocks
_E6:
inc ebp
test ebp, 31
jne _E5
jmp _E1
do_interrupt:
mov ARG1_REG, [rel g_dev_r4300_new_dynarec_hot_state_pcaddr]
call get_addr_ht
mov CCREG, [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36]
mov edx, [rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
sub CCREG, edx
jmp rax
fp_exception:
mov edx, 01000002ch
_E7:
;EAX = pcaddr
mov ecx, [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+48]
or ecx, 2
mov [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+48], ecx ;Status
mov [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+52], edx ;Cause
mov [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+56], eax ;EPC
mov ARG1_REG, 080000180h
call get_addr_ht
jmp rax
fp_exception_ds:
mov edx, 09000002ch ;Set high bit if delay slot
jmp _E7
jump_syscall:
mov edx, 020h
jmp _E7
jump_eret:
mov ecx, [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+48] ;Status
add CCREG, [rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
and ecx, 0FFFFFFFDh
mov [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36], CCREG ;Count
mov [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+48], ecx ;Status
call new_dynarec_check_interrupt
mov eax, [rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
mov CCREG, [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36]
sub CCREG, eax
mov [rel g_dev_r4300_new_dynarec_hot_state_cycle_count], CCREG
mov eax, [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+56] ;EPC
jns _E11
_E8:
mov r9, 248
xor r10d, r10d
_E9:
mov r8, g_dev_r4300_new_dynarec_hot_state_regs
mov ecx, [r8+r9]
mov edx, [r8+r9+4]
sar ecx, 31
xor edx, ecx
neg edx
adc r10d, r10d
sub r9, 8
jne _E9
mov r8, g_dev_r4300_new_dynarec_hot_state_hi
mov ecx, [r8+r9]
mov edx, [r8+r9+4]
sar ecx, 31
xor edx, ecx
jne _E10
mov r8, g_dev_r4300_new_dynarec_hot_state_lo
mov ecx, [r8+r9]
mov edx, [r8+r9+4]
sar ecx, 31
xor edx, ecx
_E10:
neg edx
adc ARG2_REG, r10d
mov ARG1_REG, eax
call get_addr_32
mov CCREG, [rel g_dev_r4300_new_dynarec_hot_state_cycle_count]
jmp rax
_E11:
mov [rel g_dev_r4300_new_dynarec_hot_state_pcaddr], eax
call cc_interrupt
mov eax, [rel g_dev_r4300_new_dynarec_hot_state_pcaddr]
jmp _E8
new_dyna_start:
;we must push an even # of registers to keep stack 16-byte aligned
%ifdef WIN64
push rdi
push rsi
%endif
push rbx
push r12
push r13
push r14
push r15
push rbp
add rsp, -56
mov ARG1_REG, 0a4000040h
call new_recompile_block
mov eax, DWORD [rel g_dev_r4300_new_dynarec_hot_state_next_interrupt]
mov CCREG, DWORD [rel g_dev_r4300_new_dynarec_hot_state_cp0_regs+36]
sub CCREG, eax
mov rax, QWORD[rel base_addr]
jmp rax
invalidate_block_eax:
mov ARG1_REG, eax
jmp invalidate_block_call
invalidate_block_edi:
mov ARG1_REG, edi
jmp invalidate_block_call
invalidate_block_edx:
mov ARG1_REG, edx
jmp invalidate_block_call
invalidate_block_ebx:
mov ARG1_REG, ebx
jmp invalidate_block_call
invalidate_block_ebp:
mov ARG1_REG, ebp
jmp invalidate_block_call
invalidate_block_esi:
mov ARG1_REG, esi
jmp invalidate_block_call
invalidate_block_ecx:
mov ARG1_REG, ecx
invalidate_block_call:
add rsp, -8
call invalidate_block
add rsp, 8
ret
breakpoint:
int 3
ret

View File

@ -666,7 +666,7 @@ static void alloc_x86_reg(struct regstat *cur,int i,signed char reg,int hr)
// Alloc cycle count into dedicated register
static void alloc_cc(struct regstat *cur,int i)
{
alloc_x86_reg(cur,i,CCREG,ESI);
alloc_x86_reg(cur,i,CCREG,HOST_CCREG);
}
/* Special alloc */
@ -3032,6 +3032,7 @@ static int do_dirty_stub(int i)
static void do_dirty_stub_ds(void)
{
assert((int)start>=(int)0xC0000000);
emit_pushimm(start+1);
emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
emit_movimm((int)copy,EDX);