mirror of
https://github.com/ptitSeb/box86.git
synced 2024-11-27 00:40:24 +00:00
[DYNAREC] Fixed CALLRET and backported many BIBLOCK improvments from box64
This commit is contained in:
parent
0829c14a73
commit
b145080d37
@ -430,6 +430,8 @@ Op is 20-27
|
||||
|
||||
// blx reg
|
||||
#define BLX(reg) EMIT(0xe12fff30 | (reg) )
|
||||
// blx cond reg
|
||||
#define BLXcond(C, reg) EMIT(C | 0x012fff30 | (reg) )
|
||||
|
||||
// b cond offset
|
||||
#define Bcond(C, O) EMIT(C | (0b101<<25) | (0<<24) | (((O)>>2)&0xffffff))
|
||||
|
@ -12,7 +12,7 @@ arm_epilog:
|
||||
stm r0, {r4-r12,r14} // put back reg value in emu, including EIP (so r14 must be Flags now)
|
||||
// restore stack pointer
|
||||
ldr sp, [r0, #(8*4+2*4)]
|
||||
ldr r5, [sp, #-4]
|
||||
pop {r4, r5}
|
||||
str r5, [r0, #(8*4+2*4)] // put back old value
|
||||
//restore all used register
|
||||
vpop {d8-d15}
|
||||
@ -25,7 +25,7 @@ arm_epilog:
|
||||
arm_epilog_fast:
|
||||
// restore stack pointer
|
||||
ldr sp, [r0, #(8*4+2*4)]
|
||||
ldr r5, [sp, #-4]
|
||||
pop {r4, r5}
|
||||
str r5, [r0, #(8*4+2*4)] // put back old value
|
||||
//restore all used register
|
||||
vpop {d8-d15}
|
||||
|
@ -13,9 +13,9 @@ arm_prolog:
|
||||
vpush {d8-d15}
|
||||
// save Sp and setup stack for optionnal callret
|
||||
ldr r5, [r0, #(8*4+2*4)] // grab old value of xSPSave
|
||||
str sp, [r0, #(8*4+2*4)]
|
||||
mov r4, #0
|
||||
push {r4-r5}
|
||||
str sp, [r0, #(8*4+2*4)]
|
||||
//setup emu -> register
|
||||
ldm r0, {r4-r12} // all 8 register in direct access, plus flags, no EIP (so r14 can be used as scratch)
|
||||
//jump to function
|
||||
|
@ -73,8 +73,7 @@ void add_next(dynarec_arm_t *dyn, uintptr_t addr) {
|
||||
}
|
||||
// add slots
|
||||
if(dyn->next_sz == dyn->next_cap) {
|
||||
dyn->next_cap += 64;
|
||||
dyn->next = (uintptr_t*)dynaRealloc(dyn->next, dyn->next_cap*sizeof(uintptr_t));
|
||||
printf_log(LOG_NONE, "Warning, overallocating next\n");
|
||||
}
|
||||
dyn->next[dyn->next_sz++] = addr;
|
||||
}
|
||||
@ -98,8 +97,7 @@ uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) {
|
||||
void add_jump(dynarec_arm_t *dyn, int ninst) {
|
||||
// add slots
|
||||
if(dyn->jmp_sz == dyn->jmp_cap) {
|
||||
dyn->jmp_cap += 64;
|
||||
dyn->jmps = (int*)dynaRealloc(dyn->jmps, dyn->jmp_cap*sizeof(int));
|
||||
printf_log(LOG_NONE, "Warning, overallocating jmps\n");
|
||||
}
|
||||
dyn->jmps[dyn->jmp_sz++] = ninst;
|
||||
}
|
||||
@ -282,33 +280,47 @@ void addInst(instsize_t* insts, size_t* size, int x86_size, int native_size)
|
||||
}
|
||||
}
|
||||
|
||||
static void recurse_mark_alive(dynarec_arm_t* dyn, int i)
|
||||
{
|
||||
if(dyn->insts[i].x86.alive)
|
||||
return;
|
||||
dyn->insts[i].x86.alive = 1;
|
||||
if(dyn->insts[i].x86.jmp && dyn->insts[i].x86.jmp_insts!=-1)
|
||||
recurse_mark_alive(dyn, dyn->insts[i].x86.jmp_insts);
|
||||
if(i<dyn->size-1 && dyn->insts[i].x86.has_next)
|
||||
recurse_mark_alive(dyn, i+1);
|
||||
}
|
||||
|
||||
static void fillPredecessors(dynarec_arm_t* dyn)
|
||||
static int sizePredecessors(dynarec_arm_t* dyn)
|
||||
{
|
||||
int pred_sz = 1; // to be safe
|
||||
// compute total size of predecessor to alocate the array
|
||||
// compute total size of predecessor to allocate the array
|
||||
// mark alive...
|
||||
recurse_mark_alive(dyn, 0);
|
||||
// first compute the jumps
|
||||
int jmpto;
|
||||
for(int i=0; i<dyn->size; ++i) {
|
||||
if(dyn->insts[i].x86.jmp && dyn->insts[i].x86.jmp_insts!=-1) {
|
||||
++pred_sz;
|
||||
++dyn->insts[dyn->insts[i].x86.jmp_insts].pred_sz;
|
||||
if(dyn->insts[i].x86.alive && dyn->insts[i].x86.jmp && ((jmpto=dyn->insts[i].x86.jmp_insts)!=-1)) {
|
||||
pred_sz++;
|
||||
dyn->insts[jmpto].pred_sz++;
|
||||
}
|
||||
}
|
||||
// remove "has_next" from orphean branch
|
||||
// remove "has_next" from orphan branch
|
||||
for(int i=0; i<dyn->size-1; ++i) {
|
||||
if(!dyn->insts[i].x86.has_next) {
|
||||
if(dyn->insts[i+1].x86.has_next && !dyn->insts[i+1].pred_sz)
|
||||
dyn->insts[i+1].x86.has_next = 0;
|
||||
}
|
||||
if(dyn->insts[i].x86.has_next && !dyn->insts[i+1].x86.alive)
|
||||
dyn->insts[i].x86.has_next = 0;
|
||||
}
|
||||
// second the "has_next"
|
||||
for(int i=0; i<dyn->size-1; ++i) {
|
||||
if(dyn->insts[i].x86.has_next) {
|
||||
++pred_sz;
|
||||
++dyn->insts[i+1].pred_sz;
|
||||
pred_sz++;
|
||||
dyn->insts[i+1].pred_sz++;
|
||||
}
|
||||
}
|
||||
dyn->predecessor = (int*)dynaMalloc(pred_sz*sizeof(int));
|
||||
return pred_sz;
|
||||
}
|
||||
static void fillPredecessors(dynarec_arm_t* dyn)
|
||||
{
|
||||
// fill pred pointer
|
||||
int* p = dyn->predecessor;
|
||||
for(int i=0; i<dyn->size; ++i) {
|
||||
@ -317,7 +329,7 @@ static void fillPredecessors(dynarec_arm_t* dyn)
|
||||
dyn->insts[i].pred_sz=0; // reset size, it's reused to actually fill pred[]
|
||||
}
|
||||
// fill pred
|
||||
for(int i=0; i<dyn->size; ++i) {
|
||||
for(int i=0; i<dyn->size; ++i) if(dyn->insts[i].x86.alive) {
|
||||
if((i!=dyn->size-1) && dyn->insts[i].x86.has_next)
|
||||
dyn->insts[i+1].pred[dyn->insts[i+1].pred_sz++] = i;
|
||||
if(dyn->insts[i].x86.jmp && (dyn->insts[i].x86.jmp_insts!=-1)) {
|
||||
@ -371,22 +383,23 @@ static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) {
|
||||
}
|
||||
|
||||
void* current_helper = NULL;
|
||||
static int static_jmps[MAX_INSTS+2];
|
||||
static uintptr_t static_next[MAX_INSTS+2];
|
||||
static instruction_arm_t static_insts[MAX_INSTS+2] = {0};
|
||||
// TODO: ninst could be a uint16_t instead of an int, that could same some temp. memory
|
||||
|
||||
void CancelBlock(int need_lock)
|
||||
{
|
||||
if(need_lock)
|
||||
mutex_lock(&my_context->mutex_dyndump);
|
||||
dynarec_arm_t* helper = (dynarec_arm_t*)current_helper;
|
||||
current_helper = NULL;
|
||||
if(helper) {
|
||||
dynaFree(helper->next);
|
||||
dynaFree(helper->insts);
|
||||
dynaFree(helper->predecessor);
|
||||
if(helper->dynablock && helper->dynablock->actual_block) {
|
||||
FreeDynarecMap((uintptr_t)helper->dynablock->actual_block);
|
||||
helper->dynablock->actual_block = NULL;
|
||||
}
|
||||
}
|
||||
current_helper = NULL;
|
||||
if(need_lock)
|
||||
mutex_unlock(&my_context->mutex_dyndump);
|
||||
}
|
||||
@ -451,10 +464,19 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
|
||||
helper.dynablock = block;
|
||||
helper.start = addr;
|
||||
uintptr_t start = addr;
|
||||
helper.cap = 64; // needs epilog handling
|
||||
helper.insts = (instruction_arm_t*)dynaCalloc(helper.cap, sizeof(instruction_arm_t));
|
||||
helper.cap = MAX_INSTS;
|
||||
helper.insts = static_insts;
|
||||
helper.jmps = static_jmps;
|
||||
helper.jmp_cap = MAX_INSTS;
|
||||
helper.next = static_next;
|
||||
helper.next_cap = MAX_INSTS;
|
||||
// pass 0, addresses, x86 jump addresses, overall size of the block
|
||||
uintptr_t end = arm_pass0(&helper, addr);
|
||||
if(helper.abort) {
|
||||
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
|
||||
CancelBlock(0);
|
||||
return NULL;
|
||||
}
|
||||
// basic checks
|
||||
if(!helper.size) {
|
||||
dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
|
||||
@ -519,24 +541,42 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
|
||||
}
|
||||
}
|
||||
// no need for next and jmps anymore
|
||||
dynaFree(helper.next);
|
||||
helper.next_sz = helper.next_cap = 0;
|
||||
helper.next = NULL;
|
||||
dynaFree(helper.jmps);
|
||||
helper.jmp_sz = helper.jmp_cap = 0;
|
||||
helper.jmps = NULL;
|
||||
// fill predecessors with the jump address
|
||||
int alloc_size = sizePredecessors(&helper);
|
||||
helper.predecessor = (int*)alloca(alloc_size*sizeof(int));
|
||||
fillPredecessors(&helper);
|
||||
|
||||
int pos = helper.size;
|
||||
while (pos>=0)
|
||||
pos = updateNeed(&helper, pos, 0);
|
||||
// remove fpu stuff on non-executed code
|
||||
for(int i=1; i<helper.size-1; ++i)
|
||||
if(!helper.insts[i].pred_sz) {
|
||||
int ii = i;
|
||||
while(ii<helper.size && !helper.insts[ii].pred_sz)
|
||||
fpu_reset_ninst(&helper, ii++);
|
||||
i = ii;
|
||||
}
|
||||
|
||||
// pass 1, float optimisations, first pass for flags
|
||||
arm_pass1(&helper, addr);
|
||||
if(helper.abort) {
|
||||
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
|
||||
CancelBlock(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// pass 2, instruction size
|
||||
arm_pass2(&helper, addr);
|
||||
if(helper.abort) {
|
||||
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
|
||||
CancelBlock(0);
|
||||
return NULL;
|
||||
}
|
||||
// ok, now allocate mapped memory, with executable flag on
|
||||
size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t);
|
||||
insts_rsize = (insts_rsize+7)&~7; // round the size...
|
||||
@ -570,13 +610,16 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
|
||||
helper.arm_size = 0;
|
||||
helper.insts_size = 0; // reset
|
||||
arm_pass3(&helper, addr);
|
||||
if(helper.abort) {
|
||||
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
|
||||
CancelBlock(0);
|
||||
return NULL;
|
||||
}
|
||||
// keep size of instructions for signal handling
|
||||
block->instsize = instsize;
|
||||
// ok, free the helper now
|
||||
dynaFree(helper.insts);
|
||||
helper.insts = NULL;
|
||||
helper.instsize = NULL;
|
||||
dynaFree(helper.predecessor);
|
||||
helper.predecessor = NULL;
|
||||
block->size = sz;
|
||||
block->isize = helper.size;
|
||||
@ -612,6 +655,8 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
|
||||
CancelBlock(0);
|
||||
return NULL;
|
||||
}
|
||||
// ok, free the helper now
|
||||
helper.insts = NULL;
|
||||
if(insts_rsize/sizeof(instsize_t)<helper.insts_size) {
|
||||
printf_log(LOG_NONE, "BOX86: Warning, ists_size difference in block between pass2 (%zu) and pass3 (%zu), allocated: %zu\n", oldinstsize, helper.insts_size, insts_rsize/sizeof(instsize_t));
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xE0:
|
||||
case 0xE1:
|
||||
@ -297,7 +297,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, d1);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 4:
|
||||
INST_NAME("FSUB ST0, float[ED]");
|
||||
|
@ -67,7 +67,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
case 0xC6:
|
||||
case 0xC7:
|
||||
INST_NAME("FLD STx");
|
||||
v2 = x87_do_push(dyn, ninst, x3, X87_ST(nextop&7));
|
||||
X87_PUSH_OR_FAIL(v2, dyn, ninst, x3, X87_ST(nextop&7));
|
||||
v1 = x87_get_st(dyn, ninst, x1, x2, (nextop&7)+1, X87_COMBINE(0, (nextop&7)+1));
|
||||
if(ST_IS_F(0)) {
|
||||
VMOV_32(v2, v1);
|
||||
@ -198,7 +198,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
|
||||
case 0xE8:
|
||||
INST_NAME("FLD1");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
VMOV_i_32(v1, 0b01110000);
|
||||
} else {
|
||||
@ -208,7 +208,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xE9:
|
||||
INST_NAME("FLDL2T");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
MOV32(x2, (&f_l2t));
|
||||
VLDR_32(v1, x2, 0);
|
||||
@ -220,7 +220,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xEA:
|
||||
INST_NAME("FLDL2E");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
MOV32(x2, (&f_l2e));
|
||||
VLDR_32(v1, x2, 0);
|
||||
@ -232,7 +232,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xEB:
|
||||
INST_NAME("FLDPI");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
MOV32(x2, (&f_pi));
|
||||
VLDR_32(v1, x2, 0);
|
||||
@ -244,7 +244,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xEC:
|
||||
INST_NAME("FLDLG2");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
MOV32(x2, (&f_lg2));
|
||||
VLDR_32(v1, x2, 0);
|
||||
@ -256,7 +256,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xED:
|
||||
INST_NAME("FLDLN2");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
MOV32(x2, (&f_ln2));
|
||||
VLDR_32(v1, x2, 0);
|
||||
@ -268,7 +268,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xEE:
|
||||
INST_NAME("FLDZ");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0)) {
|
||||
VMOV_8(v1/2, 0); // float is *2...
|
||||
} else {
|
||||
@ -311,12 +311,12 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VMOV_64(0, v1); // prepare call to log2
|
||||
CALL_1D(log2, 0);
|
||||
VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d)*ST(1).d
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
// should set C1 to 0
|
||||
break;
|
||||
case 0xF2:
|
||||
INST_NAME("FPTAN");
|
||||
v2 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v2, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D);
|
||||
// seems that tan of glib doesn't follow the rounding direction mode
|
||||
if(!box86_dynarec_fastround)
|
||||
@ -358,7 +358,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VMOV_64(v2, 0); //ST(1).d = atan2(ST1.d, ST0.d);
|
||||
if(!box86_dynarec_fastround)
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
// should set C1 to 0
|
||||
break;
|
||||
case 0xF4:
|
||||
@ -473,7 +473,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
MOV32(x2, (&d_ln2));
|
||||
VLDR_64(0, x2, 0);
|
||||
VDIV_F64(v2, v2, 0);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
// should set C1 to 0
|
||||
break;
|
||||
case 0xFA:
|
||||
@ -492,7 +492,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xFB:
|
||||
INST_NAME("FSINCOS");
|
||||
v2 = x87_do_push(dyn, ninst, x3, NEON_CACHE_ST_D);
|
||||
X87_PUSH_OR_FAIL(v2, dyn, ninst, x3, NEON_CACHE_ST_D);
|
||||
v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D);
|
||||
// seems that sin and cos function of glibc don't follow the rounding mode
|
||||
if(!box86_dynarec_fastround)
|
||||
@ -612,7 +612,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
switch((nextop>>3)&7) {
|
||||
case 0:
|
||||
INST_NAME("FLD ST0, float[ED]");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
if(ST_IS_F(0))
|
||||
s0 = v1;
|
||||
else
|
||||
@ -676,7 +676,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VMOVfrV(x2, s0);
|
||||
STR_IMM9(x2, ed, fixedaddress);
|
||||
}
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 4:
|
||||
INST_NAME("FLDENV Ed");
|
||||
|
@ -129,8 +129,8 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xE4:
|
||||
@ -197,7 +197,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCVT_F64_S32(d0, s0);
|
||||
VCMP_F64(v1, d0);
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 4:
|
||||
INST_NAME("FISUB ST0, Ed");
|
||||
|
@ -183,7 +183,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
switch((nextop>>3)&7) {
|
||||
case 0:
|
||||
INST_NAME("FILD ST0, Ed");
|
||||
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
s0 = fpu_get_scratch_single(dyn);
|
||||
parity = getedparity(dyn, ninst, addr, nextop, 2);
|
||||
if(parity) {
|
||||
@ -218,7 +218,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
MOV_IMM_COND(cNE, ed, 0b10, 1); // 0x80000000
|
||||
WBACK;
|
||||
VMSR(x14); // put back values
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 2:
|
||||
INST_NAME("FIST Ed, ST0");
|
||||
@ -260,7 +260,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
TSTS_IMM8_ROR(x3, 0b00000001, 0);
|
||||
MOV_IMM_COND(cNE, ed, 0b10, 1); // 0x80000000
|
||||
WBACK;
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
case 5:
|
||||
@ -281,7 +281,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
STRH_IMM8(x14, ed, 8);
|
||||
} else {
|
||||
if(box86_x87_no80bits) {
|
||||
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
parity = getedparity(dyn, ninst, addr, nextop, 3);
|
||||
if (parity) {
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 1023, 3, 0, NULL);
|
||||
@ -298,10 +298,10 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
if(ed!=x1) {
|
||||
MOV_REG(x1, ed);
|
||||
}
|
||||
x87_do_push_empty(dyn, ninst, x3);
|
||||
X87_PUSH_OR_FAIL_empty( , dyn, ninst, x3);
|
||||
CALL(arm_fld, -1, 0);
|
||||
#else
|
||||
v1 = x87_do_push(dyn, ninst, x2, NEON_CACHE_ST_D);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x2, NEON_CACHE_ST_D);
|
||||
// copy 10bytes of *ED to STld(0)
|
||||
LDR_IMM9(x3, xEmu, offsetof(x86emu_t, top));
|
||||
int a = -dyn->n.x87stack;
|
||||
@ -407,7 +407,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
MARK2;
|
||||
#endif
|
||||
}
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
default:
|
||||
DEFAULT;
|
||||
|
@ -115,7 +115,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xE0:
|
||||
case 0xE1:
|
||||
@ -276,7 +276,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
VCMP_F64(v1, d1);
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 4:
|
||||
INST_NAME("FSUB ST0, double[ED]");
|
||||
|
@ -49,7 +49,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
#if 1
|
||||
if((nextop&7)==0 && PK(0)==0xD9 && PK(1)==0xF7) {
|
||||
MESSAGE(LOG_DUMP, "Hack for FFREE ST0 / FINCSTP\n");
|
||||
x87_do_pop(dyn, ninst, x1);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x1);
|
||||
addr+=2;
|
||||
SKIPTEST(x1);
|
||||
} else
|
||||
@ -80,7 +80,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
break;
|
||||
case 0xD8:
|
||||
INST_NAME("FSTP ST0, ST0");
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xD9:
|
||||
case 0xDA:
|
||||
@ -92,7 +92,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
INST_NAME("FSTP ST0, STx");
|
||||
// copy the cache value for st0 to stx
|
||||
x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xE0:
|
||||
@ -130,7 +130,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xC8:
|
||||
@ -164,7 +164,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
switch((nextop>>3)&7) {
|
||||
case 0:
|
||||
INST_NAME("FLD double");
|
||||
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
parity = getedparity(dyn, ninst, addr, nextop, 3);
|
||||
if (parity) {
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 1023, 3, 0, NULL);
|
||||
@ -182,7 +182,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
|
||||
if(ed!=x1) {MOV_REG(x1, ed);}
|
||||
CALL(arm_fistt64, -1, 0);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 2:
|
||||
INST_NAME("FST double");
|
||||
@ -211,7 +211,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
STR_IMM9(x2, ed, fixedaddress);
|
||||
STR_IMM9(x3, ed, fixedaddress+4);
|
||||
}
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 4:
|
||||
INST_NAME("FRSTOR m108byte");
|
||||
|
@ -54,7 +54,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
if(!box86_dynarec_fastround)
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xC8:
|
||||
case 0xC9:
|
||||
@ -76,7 +76,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
if(!box86_dynarec_fastround)
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xD0:
|
||||
case 0xD1:
|
||||
@ -95,7 +95,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xD9:
|
||||
@ -108,8 +108,8 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOM(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xE0:
|
||||
case 0xE1:
|
||||
@ -131,7 +131,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
if(!box86_dynarec_fastround)
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xE8:
|
||||
case 0xE9:
|
||||
@ -153,7 +153,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
if(!box86_dynarec_fastround)
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xF0:
|
||||
case 0xF1:
|
||||
@ -175,7 +175,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
if(!box86_dynarec_fastround)
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xF8:
|
||||
case 0xF9:
|
||||
@ -220,7 +220,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
}
|
||||
if(!box86_dynarec_fastround || !box86_dynarec_fastnan)
|
||||
VMSR(x14); // restore fpscr
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xD8:
|
||||
|
@ -53,7 +53,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
case 0xC7:
|
||||
INST_NAME("FFREEP STx");
|
||||
// not handling Tag...
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xE0:
|
||||
@ -91,7 +91,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOMI(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 0xF0:
|
||||
case 0xF1:
|
||||
@ -112,7 +112,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
VCMP_F64(v1, v2);
|
||||
}
|
||||
FCOMI(x1, x2);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
|
||||
case 0xC8:
|
||||
@ -161,7 +161,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
switch((nextop>>3)&7) {
|
||||
case 0:
|
||||
INST_NAME("FILD ST0, Ew");
|
||||
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
|
||||
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 255, 0, 0, NULL);
|
||||
LDRSH_IMM8(x1, wback, fixedaddress);
|
||||
if(ST_IS_F(0)) {
|
||||
@ -197,7 +197,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
CMPS_REG_LSL_IMM5_COND(cEQ, ed, x3, 0);
|
||||
MOVW_COND(cNE, x3, 0x8000); // saturated
|
||||
STRH_IMM8(x3, wback, fixedaddress);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
VMSR(x14);
|
||||
break;
|
||||
case 2:
|
||||
@ -242,20 +242,20 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
CMPS_REG_LSL_IMM5_COND(cEQ, ed, x3, 0);
|
||||
MOVW_COND(cNE, x3, 0x8000); // saturated
|
||||
STRH_IMM8(x3, wback, fixedaddress);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
x87_restoreround(dyn, ninst, u8);
|
||||
break;
|
||||
case 4:
|
||||
INST_NAME("FBLD ST0, tbytes");
|
||||
MESSAGE(LOG_DUMP, "Need Optimization\n");
|
||||
x87_do_push_empty(dyn, ninst, x1);
|
||||
X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
|
||||
if(ed!=x1) {MOV_REG(x1, ed);}
|
||||
CALL(fpu_fbld, -1, 0);
|
||||
break;
|
||||
case 5:
|
||||
INST_NAME("FILD ST0, i64");
|
||||
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
|
||||
v2 = fpu_get_scratch_double(dyn);
|
||||
s0 = fpu_get_scratch_single(dyn);
|
||||
parity = getedparity(dyn, ninst, addr, nextop, 3);
|
||||
@ -315,7 +315,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
|
||||
if(ed!=x1) {MOV_REG(x1, ed);}
|
||||
CALL(fpu_fbst, -1, 0);
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
case 7: // could be inlined for most thing, but is it usefull?
|
||||
INST_NAME("FISTP i64, ST0");
|
||||
@ -440,7 +440,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
|
||||
CALL(arm_fistp64, -1, 0);
|
||||
#endif
|
||||
}
|
||||
x87_do_pop(dyn, ninst, x3);
|
||||
X87_POP_OR_FAIL(dyn, ninst, x3);
|
||||
break;
|
||||
default:
|
||||
DEFAULT;
|
||||
|
@ -487,13 +487,18 @@ void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg)
|
||||
dyn->n.neoncache[i+1].v = 0;
|
||||
}
|
||||
// Reset fpu regs counter
|
||||
static void fpu_reset_reg_neoncache(neoncache_t* n)
|
||||
{
|
||||
n->fpu_reg = 0;
|
||||
for (int i=0; i<24; ++i) {
|
||||
n->fpuused[i]=0;
|
||||
n->neoncache[i].v = 0;
|
||||
}
|
||||
|
||||
}
|
||||
void fpu_reset_reg(dynarec_arm_t* dyn)
|
||||
{
|
||||
dyn->n.fpu_reg = 0;
|
||||
for (int i=0; i<24; ++i) {
|
||||
dyn->n.fpuused[i]=0;
|
||||
dyn->n.neoncache[i].v = 0;
|
||||
}
|
||||
fpu_reset_reg_neoncache(&dyn->n);
|
||||
}
|
||||
|
||||
int neoncache_get_st(dynarec_arm_t* dyn, int ninst, int a)
|
||||
@ -1042,6 +1047,54 @@ void print_opcode(dynarec_arm_t* dyn, int ninst, uint32_t opcode)
|
||||
dynarec_log(LOG_NONE, "\t%08x\t%s\n", opcode, arm_print(opcode));
|
||||
}
|
||||
|
||||
static void x87_reset(neoncache_t* n)
|
||||
{
|
||||
for (int i=0; i<8; ++i)
|
||||
n->x87cache[i] = -1;
|
||||
n->x87stack = 0;
|
||||
n->stack = 0;
|
||||
n->stack_next = 0;
|
||||
n->stack_pop = 0;
|
||||
n->stack_push = 0;
|
||||
n->combined1 = n->combined2 = 0;
|
||||
n->swapped = 0;
|
||||
n->barrier = 0;
|
||||
n->pushed = 0;
|
||||
n->poped = 0;
|
||||
|
||||
for(int i=0; i<24; ++i)
|
||||
if(n->neoncache[i].t == NEON_CACHE_ST_F || n->neoncache[i].t == NEON_CACHE_ST_D)
|
||||
n->neoncache[i].v = 0;
|
||||
}
|
||||
static void mmx_reset(neoncache_t* n)
|
||||
{
|
||||
n->mmxcount = 0;
|
||||
for (int i=0; i<8; ++i)
|
||||
n->mmxcache[i] = -1;
|
||||
}
|
||||
static void sse_reset(neoncache_t* n)
|
||||
{
|
||||
for (int i=0; i<8; ++i)
|
||||
n->ssecache[i].v = -1;
|
||||
}
|
||||
|
||||
|
||||
void fpu_reset(dynarec_arm_t* dyn)
|
||||
{
|
||||
x87_reset(&dyn->n);
|
||||
mmx_reset(&dyn->n);
|
||||
sse_reset(&dyn->n);
|
||||
fpu_reset_reg(dyn);
|
||||
}
|
||||
|
||||
void fpu_reset_ninst(dynarec_arm_t* dyn, int ninst)
|
||||
{
|
||||
x87_reset(&dyn->insts[ninst].n);
|
||||
mmx_reset(&dyn->insts[ninst].n);
|
||||
sse_reset(&dyn->insts[ninst].n);
|
||||
fpu_reset_reg_neoncache(&dyn->insts[ninst].n);
|
||||
}
|
||||
|
||||
int fpu_is_st_freed(dynarec_arm_t* dyn, int ninst, int st)
|
||||
{
|
||||
return (dyn->n.tags&(0b11<<(st*2)))?1:0;
|
||||
|
@ -101,6 +101,10 @@ const char* getCacheName(int t, int n);
|
||||
void inst_name_pass3(dynarec_arm_t* dyn, int ninst, const char* name);
|
||||
void print_opcode(dynarec_arm_t* dyn, int ninst, uint32_t opcode);
|
||||
|
||||
// reset the cache
|
||||
void fpu_reset(dynarec_arm_t* dyn);
|
||||
void fpu_reset_ninst(dynarec_arm_t* dyn, int ninst);
|
||||
|
||||
// is st freed
|
||||
int fpu_is_st_freed(dynarec_arm_t* dyn, int ninst, int st);
|
||||
#endif //__DYNAREC_ARM_FUNCTIONS_H__
|
@ -295,11 +295,10 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
|
||||
// pop the actual return address for ARM stack
|
||||
LDM(xSP, (1<<x2)|(1<<x3));
|
||||
CMPS_REG_LSL_IMM5(x3, xEIP, 0); // is it the right address?
|
||||
BLcond(cEQ, x2);
|
||||
BXcond(cEQ, x2);
|
||||
// not the correct return address, regular jump, but purge the stack first, it's unsync now...
|
||||
CMPS_IMM8(x2, 0); // that was already the top of the stack...
|
||||
LDR_IMM9_COND(cNE, xSP, xEmu, offsetof(x86emu_t, xSPSave));
|
||||
SUB_IMM8(xSP, xSP, 16);
|
||||
LDR_IMM9_COND(cNE, xSP, xEmu, offsetof(x86emu_t, xSPSave)); // load pointer only if not already on top
|
||||
}
|
||||
MOV32(x2, getJumpTable());
|
||||
MOV_REG_LSR_IMM5(x3, xEIP, JMPTABL_SHIFT);
|
||||
@ -328,11 +327,10 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
|
||||
// pop the actual return address for ARM stack
|
||||
LDM(xSP, (1<<x2)|(1<<x3));
|
||||
CMPS_REG_LSL_IMM5(x3, xEIP, 0); // is it the right address?
|
||||
BLcond(cEQ, x2);
|
||||
BXcond(cEQ, x2);
|
||||
// not the correct return address, regular jump, but purge the stack first, it's unsync now...
|
||||
CMPS_IMM8(x2, 0); // that was already the top of the stack...
|
||||
LDR_IMM9_COND(cNE, xSP, xEmu, offsetof(x86emu_t, xSPSave));
|
||||
SUB_IMM8(xSP, xSP, 16);
|
||||
}
|
||||
MOV32(x2, getJumpTable());
|
||||
MOV_REG_LSR_IMM5(x3, xEIP, JMPTABL_SHIFT);
|
||||
@ -603,26 +601,6 @@ void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
|
||||
}
|
||||
|
||||
// x87 stuffs
|
||||
static void x87_reset(dynarec_arm_t* dyn)
|
||||
{
|
||||
for (int i=0; i<8; ++i)
|
||||
dyn->n.x87cache[i] = -1;
|
||||
dyn->n.x87stack = 0;
|
||||
dyn->n.stack = 0;
|
||||
dyn->n.stack_next = 0;
|
||||
dyn->n.stack_pop = 0;
|
||||
dyn->n.stack_push = 0;
|
||||
dyn->n.combined1 = dyn->n.combined2 = 0;
|
||||
dyn->n.swapped = 0;
|
||||
dyn->n.barrier = 0;
|
||||
dyn->n.pushed = 0;
|
||||
dyn->n.poped = 0;
|
||||
|
||||
for(int i=0; i<24; ++i)
|
||||
if(dyn->n.neoncache[i].t == NEON_CACHE_ST_F || dyn->n.neoncache[i].t == NEON_CACHE_ST_D)
|
||||
dyn->n.neoncache[i].v = 0;
|
||||
}
|
||||
|
||||
void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
|
||||
{
|
||||
if(!dyn->n.x87stack)
|
||||
@ -1383,12 +1361,6 @@ void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1)
|
||||
}
|
||||
|
||||
// MMX helpers
|
||||
static void mmx_reset(dynarec_arm_t* dyn)
|
||||
{
|
||||
dyn->n.mmxcount = 0;
|
||||
for (int i=0; i<8; ++i)
|
||||
dyn->n.mmxcache[i] = -1;
|
||||
}
|
||||
static int isx87Empty(dynarec_arm_t* dyn)
|
||||
{
|
||||
for (int i=0; i<8; ++i)
|
||||
@ -1470,11 +1442,6 @@ static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
|
||||
|
||||
|
||||
// SSE / SSE2 helpers
|
||||
static void sse_reset(dynarec_arm_t* dyn)
|
||||
{
|
||||
for (int i=0; i<8; ++i)
|
||||
dyn->n.ssecache[i].v = -1;
|
||||
}
|
||||
// get neon register for a SSE reg, create the entry if needed
|
||||
int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a, int forwrite)
|
||||
{
|
||||
@ -2099,14 +2066,6 @@ void fpu_unreflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
|
||||
x87_unreflectcache(dyn, ninst, s1, s2, s3);
|
||||
}
|
||||
|
||||
void fpu_reset(dynarec_arm_t* dyn)
|
||||
{
|
||||
x87_reset(dyn);
|
||||
mmx_reset(dyn);
|
||||
sse_reset(dyn);
|
||||
fpu_reset_reg(dyn);
|
||||
}
|
||||
|
||||
// get the single reg that from the double "reg" (so Dx[idx])
|
||||
int fpu_get_single_reg(dynarec_arm_t* dyn, int ninst, int reg, int idx)
|
||||
{
|
||||
|
@ -343,6 +343,37 @@
|
||||
} \
|
||||
|
||||
|
||||
#if STEP == 0
|
||||
#define X87_PUSH_OR_FAIL(var, dyn, ninst, scratch, t) var = x87_do_push(dyn, ninst, scratch, t)
|
||||
#define X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, scratch) x87_do_push_empty(dyn, ninst, scratch)
|
||||
#define X87_POP_OR_FAIL(dyn, ninst, scratch) x87_do_pop(dyn, ninst, scratch)
|
||||
#else
|
||||
#define X87_PUSH_OR_FAIL(var, dyn, ninst, scratch, t) \
|
||||
if ((dyn->n.x87stack==8) || (dyn->n.pushed==8)) { \
|
||||
if(box86_dynarec_dump) dynarec_log(LOG_NONE, " Warning, suspicious x87 Push, stack=%d/%d on inst %d\n", dyn->n.x87stack, dyn->n.pushed, ninst); \
|
||||
dyn->abort = 1; \
|
||||
return addr; \
|
||||
} \
|
||||
var = x87_do_push(dyn, ninst, scratch, t)
|
||||
|
||||
#define X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, scratch) \
|
||||
if ((dyn->n.x87stack==8) || (dyn->n.pushed==8)) { \
|
||||
if(box86_dynarec_dump) dynarec_log(LOG_NONE, " Warning, suspicious x87 Push, stack=%d/%d on inst %d\n", dyn->n.x87stack, dyn->n.pushed, ninst); \
|
||||
dyn->abort = 1; \
|
||||
return addr; \
|
||||
} \
|
||||
x87_do_push_empty(dyn, ninst, scratch)
|
||||
|
||||
#define X87_POP_OR_FAIL(dyn, ninst, scratch) \
|
||||
if ((dyn->n.x87stack==-8) || (dyn->n.poped==8)) { \
|
||||
if(box86_dynarec_dump) dynarec_log(LOG_NONE, " Warning, suspicious x87 Pop, stack=%d/%d on inst %d\n", dyn->n.x87stack, dyn->n.poped, ninst); \
|
||||
dyn->abort = 1; \
|
||||
return addr; \
|
||||
} \
|
||||
x87_do_pop(dyn, ninst, scratch)
|
||||
#endif
|
||||
|
||||
|
||||
#define SET_DFNONE(S) if(!dyn->f.dfnone) {MOVW(S, d_none); STR_IMM9(S, xEmu, offsetof(x86emu_t, df)); dyn->f.dfnone=1;}
|
||||
#define SET_DF(S, N) \
|
||||
if(N) { \
|
||||
@ -590,7 +621,6 @@ void* arm_next(x86emu_t* emu, uintptr_t addr);
|
||||
|
||||
#define fpu_pushcache STEPNAME(fpu_pushcache)
|
||||
#define fpu_popcache STEPNAME(fpu_popcache)
|
||||
#define fpu_reset STEPNAME(fpu_reset)
|
||||
#define fpu_reset_cache STEPNAME(fpu_reset_cache)
|
||||
#define fpu_propagate_stack STEPNAME(fpu_propagate_stack)
|
||||
#define fpu_purgecache STEPNAME(fpu_purgecache)
|
||||
@ -805,8 +835,6 @@ void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a, int s1);
|
||||
int sse_reflect_reg(dynarec_arm_t* dyn, int ninst, int a, int s1);
|
||||
|
||||
// common coproc helpers
|
||||
// reset the cache
|
||||
void fpu_reset(dynarec_arm_t* dyn);
|
||||
// reset the cache with n
|
||||
void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n);
|
||||
// propagate stack state
|
||||
|
@ -49,6 +49,9 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
|
||||
dyn->forward_to = 0;
|
||||
dyn->forward_size = 0;
|
||||
dyn->forward_ninst = 0;
|
||||
#if STEP == 0
|
||||
memset(&dyn->insts[ninst], 0, sizeof(instruction_arm_t));
|
||||
#endif
|
||||
fpu_reset(dyn);
|
||||
int reset_n = -1;
|
||||
int stopblock = 2+(FindElfAddress(my_context, addr)?0:1); // if block is in elf_memory, it can be extended with bligblocks==2, else it needs 3 // ok, go now
|
||||
@ -156,8 +159,8 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
|
||||
ok = 1;
|
||||
// we use the 1st predecessor here
|
||||
int ii = ninst+1;
|
||||
if(ii<dyn->size && !dyn->insts[ii].pred_sz) {
|
||||
while(ii<dyn->size && (!dyn->insts[ii].pred_sz || (dyn->insts[ii].pred_sz==1 && dyn->insts[ii].pred[0]==ii-1))) {
|
||||
if(ii<dyn->size && !dyn->insts[ii].x86.alive) {
|
||||
while(ii<dyn->size && !dyn->insts[ii].x86.alive) {
|
||||
// may need to skip opcodes to advance
|
||||
++ninst;
|
||||
NEW_INST;
|
||||
@ -189,7 +192,7 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
|
||||
if(dyn->forward_to == addr && !need_epilog && ok>=0) {
|
||||
// we made it!
|
||||
reset_n = get_first_jump(dyn, addr);
|
||||
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x86.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to);
|
||||
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p (ninst %d - %d)\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x86.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to, reset_n, ninst);
|
||||
if(dyn->insts[dyn->forward_ninst].x86.has_callret && !dyn->insts[dyn->forward_ninst].x86.has_next)
|
||||
dyn->insts[dyn->forward_ninst].x86.has_next = 1; // this block actually continue
|
||||
dyn->forward = 0;
|
||||
@ -197,7 +200,7 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
|
||||
dyn->forward_size = 0;
|
||||
dyn->forward_ninst = 0;
|
||||
ok = 1; // in case it was 0
|
||||
} else if ((dyn->forward_to < addr) || !ok) {
|
||||
} else if ((dyn->forward_to < addr) || ok<=0) {
|
||||
// something when wrong! rollback
|
||||
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Could not forward extend block for %d bytes %p -> %p\n", dyn->forward_to-dyn->forward, (void*)dyn->forward, (void*)dyn->forward_to);
|
||||
ok = 0;
|
||||
@ -272,6 +275,7 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
|
||||
reset_n = -2;
|
||||
++ninst;
|
||||
#if STEP == 0
|
||||
memset(&dyn->insts[ninst], 0, sizeof(instruction_arm_t));
|
||||
if(ok && (((box86_dynarec_bigblock<stopblock) && !isJumpTableDefault((void*)addr))
|
||||
|| (addr>=box86_nodynarec_start && addr<box86_nodynarec_end)))
|
||||
#else
|
||||
|
@ -22,11 +22,6 @@
|
||||
#define SET_HASCALLRET() dyn->insts[ninst].x86.has_callret = 1
|
||||
#define NEW_INST \
|
||||
++dyn->size; \
|
||||
if(dyn->size+3>=dyn->cap) { \
|
||||
dyn->insts = (instruction_arm_t*)dynaRealloc(dyn->insts, sizeof(instruction_arm_t)*dyn->cap*2);\
|
||||
memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_arm_t)*dyn->cap); \
|
||||
dyn->cap *= 2; \
|
||||
} \
|
||||
dyn->insts[ninst].x86.addr = ip; \
|
||||
dyn->n.combined1 = dyn->n.combined2 = 0;\
|
||||
dyn->n.swapped = 0; dyn->n.barrier = 0; \
|
||||
|
@ -114,6 +114,7 @@ typedef struct dynarec_arm_s {
|
||||
int32_t forward_size; // size at the forward point
|
||||
int forward_ninst; // ninst at the forward point
|
||||
uint8_t always_test;
|
||||
uint8_t abort; // abort the creation of the block
|
||||
} dynarec_arm_t;
|
||||
|
||||
void add_next(dynarec_arm_t *dyn, uintptr_t addr);
|
||||
|
@ -37,6 +37,7 @@ typedef struct instruction_x86_s {
|
||||
uint8_t jmp_cond:1; // 1 of conditionnal jump
|
||||
uint8_t has_next:1; // does this opcode can continue to the next?
|
||||
uint8_t has_callret:1; // this instruction have an optimised call setup
|
||||
uint8_t alive:1; // this opcode gets executed (0 if dead code in that block)
|
||||
uint8_t barrier; // next instruction is a jump point, so no optim allowed
|
||||
uint8_t barrier_next; // next instruction needs a barrier
|
||||
uint8_t state_flags;// One of SF_XXX state
|
||||
|
@ -62,9 +62,9 @@ typedef struct x86emu_s {
|
||||
mmx87_regs_t mmx[8];
|
||||
uint32_t top; // top is part of sw, but it's faster to have it separatly
|
||||
int fpu_stack;
|
||||
uint32_t fpu_tags; // tags for the x87 regs, stacked, only on a 16bits anyway
|
||||
fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst
|
||||
fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence
|
||||
uint32_t fpu_tags; // tags for the x87 regs, stacked, only on a 16bits anyway
|
||||
// sse
|
||||
sse_regs_t xmm[8];
|
||||
mmxcontrol_t mxcsr;
|
||||
|
@ -5,6 +5,8 @@ typedef struct dynablock_s dynablock_t;
|
||||
typedef struct x86emu_s x86emu_t;
|
||||
typedef struct instsize_s instsize_t;
|
||||
|
||||
#define MAX_INSTS 32760
|
||||
|
||||
void addInst(instsize_t* insts, size_t* size, int x86_size, int arm_size);
|
||||
|
||||
void CancelBlock();
|
||||
|
Loading…
Reference in New Issue
Block a user