[DYNAREC] Fixed CALLRET and backported many BIBLOCK improvments from box64

This commit is contained in:
ptitSeb 2024-05-19 17:22:58 +02:00
parent 0829c14a73
commit b145080d37
22 changed files with 242 additions and 148 deletions

View File

@ -430,6 +430,8 @@ Op is 20-27
// blx reg
#define BLX(reg) EMIT(0xe12fff30 | (reg) )
// blx cond reg
#define BLXcond(C, reg) EMIT(C | 0x012fff30 | (reg) )
// b cond offset
#define Bcond(C, O) EMIT(C | (0b101<<25) | (0<<24) | (((O)>>2)&0xffffff))

View File

@ -12,7 +12,7 @@ arm_epilog:
stm r0, {r4-r12,r14} // put back reg value in emu, including EIP (so r14 must be Flags now)
// restore stack pointer
ldr sp, [r0, #(8*4+2*4)]
ldr r5, [sp, #-4]
pop {r4, r5}
str r5, [r0, #(8*4+2*4)] // put back old value
//restore all used register
vpop {d8-d15}
@ -25,7 +25,7 @@ arm_epilog:
arm_epilog_fast:
// restore stack pointer
ldr sp, [r0, #(8*4+2*4)]
ldr r5, [sp, #-4]
pop {r4, r5}
str r5, [r0, #(8*4+2*4)] // put back old value
//restore all used register
vpop {d8-d15}

View File

@ -13,9 +13,9 @@ arm_prolog:
vpush {d8-d15}
// save Sp and setup stack for optionnal callret
ldr r5, [r0, #(8*4+2*4)] // grab old value of xSPSave
str sp, [r0, #(8*4+2*4)]
mov r4, #0
push {r4-r5}
str sp, [r0, #(8*4+2*4)]
//setup emu -> register
ldm r0, {r4-r12} // all 8 register in direct access, plus flags, no EIP (so r14 can be used as scratch)
//jump to function

View File

@ -73,8 +73,7 @@ void add_next(dynarec_arm_t *dyn, uintptr_t addr) {
}
// add slots
if(dyn->next_sz == dyn->next_cap) {
dyn->next_cap += 64;
dyn->next = (uintptr_t*)dynaRealloc(dyn->next, dyn->next_cap*sizeof(uintptr_t));
printf_log(LOG_NONE, "Warning, overallocating next\n");
}
dyn->next[dyn->next_sz++] = addr;
}
@ -98,8 +97,7 @@ uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) {
void add_jump(dynarec_arm_t *dyn, int ninst) {
// add slots
if(dyn->jmp_sz == dyn->jmp_cap) {
dyn->jmp_cap += 64;
dyn->jmps = (int*)dynaRealloc(dyn->jmps, dyn->jmp_cap*sizeof(int));
printf_log(LOG_NONE, "Warning, overallocating jmps\n");
}
dyn->jmps[dyn->jmp_sz++] = ninst;
}
@ -282,33 +280,47 @@ void addInst(instsize_t* insts, size_t* size, int x86_size, int native_size)
}
}
static void recurse_mark_alive(dynarec_arm_t* dyn, int i)
{
if(dyn->insts[i].x86.alive)
return;
dyn->insts[i].x86.alive = 1;
if(dyn->insts[i].x86.jmp && dyn->insts[i].x86.jmp_insts!=-1)
recurse_mark_alive(dyn, dyn->insts[i].x86.jmp_insts);
if(i<dyn->size-1 && dyn->insts[i].x86.has_next)
recurse_mark_alive(dyn, i+1);
}
static void fillPredecessors(dynarec_arm_t* dyn)
static int sizePredecessors(dynarec_arm_t* dyn)
{
int pred_sz = 1; // to be safe
// compute total size of predecessor to alocate the array
// compute total size of predecessor to allocate the array
// mark alive...
recurse_mark_alive(dyn, 0);
// first compute the jumps
int jmpto;
for(int i=0; i<dyn->size; ++i) {
if(dyn->insts[i].x86.jmp && dyn->insts[i].x86.jmp_insts!=-1) {
++pred_sz;
++dyn->insts[dyn->insts[i].x86.jmp_insts].pred_sz;
if(dyn->insts[i].x86.alive && dyn->insts[i].x86.jmp && ((jmpto=dyn->insts[i].x86.jmp_insts)!=-1)) {
pred_sz++;
dyn->insts[jmpto].pred_sz++;
}
}
// remove "has_next" from orphean branch
// remove "has_next" from orphan branch
for(int i=0; i<dyn->size-1; ++i) {
if(!dyn->insts[i].x86.has_next) {
if(dyn->insts[i+1].x86.has_next && !dyn->insts[i+1].pred_sz)
dyn->insts[i+1].x86.has_next = 0;
}
if(dyn->insts[i].x86.has_next && !dyn->insts[i+1].x86.alive)
dyn->insts[i].x86.has_next = 0;
}
// second the "has_next"
for(int i=0; i<dyn->size-1; ++i) {
if(dyn->insts[i].x86.has_next) {
++pred_sz;
++dyn->insts[i+1].pred_sz;
pred_sz++;
dyn->insts[i+1].pred_sz++;
}
}
dyn->predecessor = (int*)dynaMalloc(pred_sz*sizeof(int));
return pred_sz;
}
static void fillPredecessors(dynarec_arm_t* dyn)
{
// fill pred pointer
int* p = dyn->predecessor;
for(int i=0; i<dyn->size; ++i) {
@ -317,7 +329,7 @@ static void fillPredecessors(dynarec_arm_t* dyn)
dyn->insts[i].pred_sz=0; // reset size, it's reused to actually fill pred[]
}
// fill pred
for(int i=0; i<dyn->size; ++i) {
for(int i=0; i<dyn->size; ++i) if(dyn->insts[i].x86.alive) {
if((i!=dyn->size-1) && dyn->insts[i].x86.has_next)
dyn->insts[i+1].pred[dyn->insts[i+1].pred_sz++] = i;
if(dyn->insts[i].x86.jmp && (dyn->insts[i].x86.jmp_insts!=-1)) {
@ -371,22 +383,23 @@ static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) {
}
void* current_helper = NULL;
static int static_jmps[MAX_INSTS+2];
static uintptr_t static_next[MAX_INSTS+2];
static instruction_arm_t static_insts[MAX_INSTS+2] = {0};
// TODO: ninst could be a uint16_t instead of an int, that could same some temp. memory
void CancelBlock(int need_lock)
{
if(need_lock)
mutex_lock(&my_context->mutex_dyndump);
dynarec_arm_t* helper = (dynarec_arm_t*)current_helper;
current_helper = NULL;
if(helper) {
dynaFree(helper->next);
dynaFree(helper->insts);
dynaFree(helper->predecessor);
if(helper->dynablock && helper->dynablock->actual_block) {
FreeDynarecMap((uintptr_t)helper->dynablock->actual_block);
helper->dynablock->actual_block = NULL;
}
}
current_helper = NULL;
if(need_lock)
mutex_unlock(&my_context->mutex_dyndump);
}
@ -451,10 +464,19 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
helper.dynablock = block;
helper.start = addr;
uintptr_t start = addr;
helper.cap = 64; // needs epilog handling
helper.insts = (instruction_arm_t*)dynaCalloc(helper.cap, sizeof(instruction_arm_t));
helper.cap = MAX_INSTS;
helper.insts = static_insts;
helper.jmps = static_jmps;
helper.jmp_cap = MAX_INSTS;
helper.next = static_next;
helper.next_cap = MAX_INSTS;
// pass 0, addresses, x86 jump addresses, overall size of the block
uintptr_t end = arm_pass0(&helper, addr);
if(helper.abort) {
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
CancelBlock(0);
return NULL;
}
// basic checks
if(!helper.size) {
dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
@ -519,24 +541,42 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
}
}
// no need for next and jmps anymore
dynaFree(helper.next);
helper.next_sz = helper.next_cap = 0;
helper.next = NULL;
dynaFree(helper.jmps);
helper.jmp_sz = helper.jmp_cap = 0;
helper.jmps = NULL;
// fill predecessors with the jump address
int alloc_size = sizePredecessors(&helper);
helper.predecessor = (int*)alloca(alloc_size*sizeof(int));
fillPredecessors(&helper);
int pos = helper.size;
while (pos>=0)
pos = updateNeed(&helper, pos, 0);
// remove fpu stuff on non-executed code
for(int i=1; i<helper.size-1; ++i)
if(!helper.insts[i].pred_sz) {
int ii = i;
while(ii<helper.size && !helper.insts[ii].pred_sz)
fpu_reset_ninst(&helper, ii++);
i = ii;
}
// pass 1, float optimisations, first pass for flags
arm_pass1(&helper, addr);
if(helper.abort) {
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
CancelBlock(0);
return NULL;
}
// pass 2, instruction size
arm_pass2(&helper, addr);
if(helper.abort) {
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
CancelBlock(0);
return NULL;
}
// ok, now allocate mapped memory, with executable flag on
size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t);
insts_rsize = (insts_rsize+7)&~7; // round the size...
@ -570,13 +610,16 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
helper.arm_size = 0;
helper.insts_size = 0; // reset
arm_pass3(&helper, addr);
if(helper.abort) {
if(box86_dynarec_dump || box86_dynarec_log)dynarec_log(LOG_NONE, "Abort dynablock on pass0\n");
CancelBlock(0);
return NULL;
}
// keep size of instructions for signal handling
block->instsize = instsize;
// ok, free the helper now
dynaFree(helper.insts);
helper.insts = NULL;
helper.instsize = NULL;
dynaFree(helper.predecessor);
helper.predecessor = NULL;
block->size = sz;
block->isize = helper.size;
@ -612,6 +655,8 @@ dynarec_log(LOG_DEBUG, "Asked to Fill block %p with %p\n", block, (void*)addr);
CancelBlock(0);
return NULL;
}
// ok, free the helper now
helper.insts = NULL;
if(insts_rsize/sizeof(instsize_t)<helper.insts_size) {
printf_log(LOG_NONE, "BOX86: Warning, ists_size difference in block between pass2 (%zu) and pass3 (%zu), allocated: %zu\n", oldinstsize, helper.insts_size, insts_rsize/sizeof(instsize_t));
}

View File

@ -119,7 +119,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
case 0xE1:
@ -297,7 +297,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, d1);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
INST_NAME("FSUB ST0, float[ED]");

View File

@ -67,7 +67,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xC6:
case 0xC7:
INST_NAME("FLD STx");
v2 = x87_do_push(dyn, ninst, x3, X87_ST(nextop&7));
X87_PUSH_OR_FAIL(v2, dyn, ninst, x3, X87_ST(nextop&7));
v1 = x87_get_st(dyn, ninst, x1, x2, (nextop&7)+1, X87_COMBINE(0, (nextop&7)+1));
if(ST_IS_F(0)) {
VMOV_32(v2, v1);
@ -198,7 +198,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xE8:
INST_NAME("FLD1");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
VMOV_i_32(v1, 0b01110000);
} else {
@ -208,7 +208,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xE9:
INST_NAME("FLDL2T");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32(x2, (&f_l2t));
VLDR_32(v1, x2, 0);
@ -220,7 +220,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xEA:
INST_NAME("FLDL2E");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32(x2, (&f_l2e));
VLDR_32(v1, x2, 0);
@ -232,7 +232,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xEB:
INST_NAME("FLDPI");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32(x2, (&f_pi));
VLDR_32(v1, x2, 0);
@ -244,7 +244,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xEC:
INST_NAME("FLDLG2");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32(x2, (&f_lg2));
VLDR_32(v1, x2, 0);
@ -256,7 +256,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xED:
INST_NAME("FLDLN2");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
MOV32(x2, (&f_ln2));
VLDR_32(v1, x2, 0);
@ -268,7 +268,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xEE:
INST_NAME("FLDZ");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0)) {
VMOV_8(v1/2, 0); // float is *2...
} else {
@ -311,12 +311,12 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOV_64(0, v1); // prepare call to log2
CALL_1D(log2, 0);
VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d)*ST(1).d
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
// should set C1 to 0
break;
case 0xF2:
INST_NAME("FPTAN");
v2 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v2, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D);
// seems that tan of glib doesn't follow the rounding direction mode
if(!box86_dynarec_fastround)
@ -358,7 +358,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOV_64(v2, 0); //ST(1).d = atan2(ST1.d, ST0.d);
if(!box86_dynarec_fastround)
x87_restoreround(dyn, ninst, u8);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
// should set C1 to 0
break;
case 0xF4:
@ -473,7 +473,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
MOV32(x2, (&d_ln2));
VLDR_64(0, x2, 0);
VDIV_F64(v2, v2, 0);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
// should set C1 to 0
break;
case 0xFA:
@ -492,7 +492,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xFB:
INST_NAME("FSINCOS");
v2 = x87_do_push(dyn, ninst, x3, NEON_CACHE_ST_D);
X87_PUSH_OR_FAIL(v2, dyn, ninst, x3, NEON_CACHE_ST_D);
v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D);
// seems that sin and cos function of glibc don't follow the rounding mode
if(!box86_dynarec_fastround)
@ -612,7 +612,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
switch((nextop>>3)&7) {
case 0:
INST_NAME("FLD ST0, float[ED]");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
if(ST_IS_F(0))
s0 = v1;
else
@ -676,7 +676,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VMOVfrV(x2, s0);
STR_IMM9(x2, ed, fixedaddress);
}
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
INST_NAME("FLDENV Ed");

View File

@ -129,8 +129,8 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE4:
@ -197,7 +197,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCVT_F64_S32(d0, s0);
VCMP_F64(v1, d0);
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
INST_NAME("FISUB ST0, Ed");

View File

@ -183,7 +183,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
switch((nextop>>3)&7) {
case 0:
INST_NAME("FILD ST0, Ed");
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
s0 = fpu_get_scratch_single(dyn);
parity = getedparity(dyn, ninst, addr, nextop, 2);
if(parity) {
@ -218,7 +218,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
MOV_IMM_COND(cNE, ed, 0b10, 1); // 0x80000000
WBACK;
VMSR(x14); // put back values
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 2:
INST_NAME("FIST Ed, ST0");
@ -260,7 +260,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
TSTS_IMM8_ROR(x3, 0b00000001, 0);
MOV_IMM_COND(cNE, ed, 0b10, 1); // 0x80000000
WBACK;
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
x87_restoreround(dyn, ninst, u8);
break;
case 5:
@ -281,7 +281,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
STRH_IMM8(x14, ed, 8);
} else {
if(box86_x87_no80bits) {
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
parity = getedparity(dyn, ninst, addr, nextop, 3);
if (parity) {
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 1023, 3, 0, NULL);
@ -298,10 +298,10 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
if(ed!=x1) {
MOV_REG(x1, ed);
}
x87_do_push_empty(dyn, ninst, x3);
X87_PUSH_OR_FAIL_empty( , dyn, ninst, x3);
CALL(arm_fld, -1, 0);
#else
v1 = x87_do_push(dyn, ninst, x2, NEON_CACHE_ST_D);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x2, NEON_CACHE_ST_D);
// copy 10bytes of *ED to STld(0)
LDR_IMM9(x3, xEmu, offsetof(x86emu_t, top));
int a = -dyn->n.x87stack;
@ -407,7 +407,7 @@ uintptr_t dynarecDB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
MARK2;
#endif
}
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
default:
DEFAULT;

View File

@ -115,7 +115,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
case 0xE1:
@ -276,7 +276,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
VCMP_F64(v1, d1);
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
INST_NAME("FSUB ST0, double[ED]");

View File

@ -49,7 +49,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
#if 1
if((nextop&7)==0 && PK(0)==0xD9 && PK(1)==0xF7) {
MESSAGE(LOG_DUMP, "Hack for FFREE ST0 / FINCSTP\n");
x87_do_pop(dyn, ninst, x1);
X87_POP_OR_FAIL(dyn, ninst, x1);
addr+=2;
SKIPTEST(x1);
} else
@ -80,7 +80,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
break;
case 0xD8:
INST_NAME("FSTP ST0, ST0");
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xD9:
case 0xDA:
@ -92,7 +92,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
INST_NAME("FSTP ST0, STx");
// copy the cache value for st0 to stx
x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
@ -130,7 +130,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xC8:
@ -164,7 +164,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
switch((nextop>>3)&7) {
case 0:
INST_NAME("FLD double");
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
parity = getedparity(dyn, ninst, addr, nextop, 3);
if (parity) {
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 1023, 3, 0, NULL);
@ -182,7 +182,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
if(ed!=x1) {MOV_REG(x1, ed);}
CALL(arm_fistt64, -1, 0);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 2:
INST_NAME("FST double");
@ -211,7 +211,7 @@ uintptr_t dynarecDD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
STR_IMM9(x2, ed, fixedaddress);
STR_IMM9(x3, ed, fixedaddress+4);
}
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 4:
INST_NAME("FRSTOR m108byte");

View File

@ -54,7 +54,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
if(!box86_dynarec_fastround)
x87_restoreround(dyn, ninst, u8);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xC8:
case 0xC9:
@ -76,7 +76,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
if(!box86_dynarec_fastround)
x87_restoreround(dyn, ninst, u8);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xD0:
case 0xD1:
@ -95,7 +95,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xD9:
@ -108,8 +108,8 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOM(x1, x2);
x87_do_pop(dyn, ninst, x3);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
case 0xE1:
@ -131,7 +131,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
if(!box86_dynarec_fastround)
x87_restoreround(dyn, ninst, u8);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE8:
case 0xE9:
@ -153,7 +153,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
if(!box86_dynarec_fastround)
x87_restoreround(dyn, ninst, u8);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xF0:
case 0xF1:
@ -175,7 +175,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
if(!box86_dynarec_fastround)
x87_restoreround(dyn, ninst, u8);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xF8:
case 0xF9:
@ -220,7 +220,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
}
if(!box86_dynarec_fastround || !box86_dynarec_fastnan)
VMSR(x14); // restore fpscr
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xD8:

View File

@ -53,7 +53,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
case 0xC7:
INST_NAME("FFREEP STx");
// not handling Tag...
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xE0:
@ -91,7 +91,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOMI(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xF0:
case 0xF1:
@ -112,7 +112,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
VCMP_F64(v1, v2);
}
FCOMI(x1, x2);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 0xC8:
@ -161,7 +161,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
switch((nextop>>3)&7) {
case 0:
INST_NAME("FILD ST0, Ew");
v1 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 255, 0, 0, NULL);
LDRSH_IMM8(x1, wback, fixedaddress);
if(ST_IS_F(0)) {
@ -197,7 +197,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
CMPS_REG_LSL_IMM5_COND(cEQ, ed, x3, 0);
MOVW_COND(cNE, x3, 0x8000); // saturated
STRH_IMM8(x3, wback, fixedaddress);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
VMSR(x14);
break;
case 2:
@ -242,20 +242,20 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
CMPS_REG_LSL_IMM5_COND(cEQ, ed, x3, 0);
MOVW_COND(cNE, x3, 0x8000); // saturated
STRH_IMM8(x3, wback, fixedaddress);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
x87_restoreround(dyn, ninst, u8);
break;
case 4:
INST_NAME("FBLD ST0, tbytes");
MESSAGE(LOG_DUMP, "Need Optimization\n");
x87_do_push_empty(dyn, ninst, x1);
X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
if(ed!=x1) {MOV_REG(x1, ed);}
CALL(fpu_fbld, -1, 0);
break;
case 5:
INST_NAME("FILD ST0, i64");
v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D);
X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
v2 = fpu_get_scratch_double(dyn);
s0 = fpu_get_scratch_single(dyn);
parity = getedparity(dyn, ninst, addr, nextop, 3);
@ -315,7 +315,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, 0, NULL);
if(ed!=x1) {MOV_REG(x1, ed);}
CALL(fpu_fbst, -1, 0);
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
case 7: // could be inlined for most thing, but is it usefull?
INST_NAME("FISTP i64, ST0");
@ -440,7 +440,7 @@ uintptr_t dynarecDF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst,
CALL(arm_fistp64, -1, 0);
#endif
}
x87_do_pop(dyn, ninst, x3);
X87_POP_OR_FAIL(dyn, ninst, x3);
break;
default:
DEFAULT;

View File

@ -487,13 +487,18 @@ void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg)
dyn->n.neoncache[i+1].v = 0;
}
// Reset fpu regs counter
static void fpu_reset_reg_neoncache(neoncache_t* n)
{
n->fpu_reg = 0;
for (int i=0; i<24; ++i) {
n->fpuused[i]=0;
n->neoncache[i].v = 0;
}
}
void fpu_reset_reg(dynarec_arm_t* dyn)
{
dyn->n.fpu_reg = 0;
for (int i=0; i<24; ++i) {
dyn->n.fpuused[i]=0;
dyn->n.neoncache[i].v = 0;
}
fpu_reset_reg_neoncache(&dyn->n);
}
int neoncache_get_st(dynarec_arm_t* dyn, int ninst, int a)
@ -1042,6 +1047,54 @@ void print_opcode(dynarec_arm_t* dyn, int ninst, uint32_t opcode)
dynarec_log(LOG_NONE, "\t%08x\t%s\n", opcode, arm_print(opcode));
}
static void x87_reset(neoncache_t* n)
{
for (int i=0; i<8; ++i)
n->x87cache[i] = -1;
n->x87stack = 0;
n->stack = 0;
n->stack_next = 0;
n->stack_pop = 0;
n->stack_push = 0;
n->combined1 = n->combined2 = 0;
n->swapped = 0;
n->barrier = 0;
n->pushed = 0;
n->poped = 0;
for(int i=0; i<24; ++i)
if(n->neoncache[i].t == NEON_CACHE_ST_F || n->neoncache[i].t == NEON_CACHE_ST_D)
n->neoncache[i].v = 0;
}
static void mmx_reset(neoncache_t* n)
{
n->mmxcount = 0;
for (int i=0; i<8; ++i)
n->mmxcache[i] = -1;
}
static void sse_reset(neoncache_t* n)
{
for (int i=0; i<8; ++i)
n->ssecache[i].v = -1;
}
void fpu_reset(dynarec_arm_t* dyn)
{
x87_reset(&dyn->n);
mmx_reset(&dyn->n);
sse_reset(&dyn->n);
fpu_reset_reg(dyn);
}
void fpu_reset_ninst(dynarec_arm_t* dyn, int ninst)
{
x87_reset(&dyn->insts[ninst].n);
mmx_reset(&dyn->insts[ninst].n);
sse_reset(&dyn->insts[ninst].n);
fpu_reset_reg_neoncache(&dyn->insts[ninst].n);
}
int fpu_is_st_freed(dynarec_arm_t* dyn, int ninst, int st)
{
return (dyn->n.tags&(0b11<<(st*2)))?1:0;

View File

@ -101,6 +101,10 @@ const char* getCacheName(int t, int n);
void inst_name_pass3(dynarec_arm_t* dyn, int ninst, const char* name);
void print_opcode(dynarec_arm_t* dyn, int ninst, uint32_t opcode);
// reset the cache
void fpu_reset(dynarec_arm_t* dyn);
void fpu_reset_ninst(dynarec_arm_t* dyn, int ninst);
// is st freed
int fpu_is_st_freed(dynarec_arm_t* dyn, int ninst, int st);
#endif //__DYNAREC_ARM_FUNCTIONS_H__

View File

@ -295,11 +295,10 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
// pop the actual return address for ARM stack
LDM(xSP, (1<<x2)|(1<<x3));
CMPS_REG_LSL_IMM5(x3, xEIP, 0); // is it the right address?
BLcond(cEQ, x2);
BXcond(cEQ, x2);
// not the correct return address, regular jump, but purge the stack first, it's unsync now...
CMPS_IMM8(x2, 0); // that was already the top of the stack...
LDR_IMM9_COND(cNE, xSP, xEmu, offsetof(x86emu_t, xSPSave));
SUB_IMM8(xSP, xSP, 16);
LDR_IMM9_COND(cNE, xSP, xEmu, offsetof(x86emu_t, xSPSave)); // load pointer only if not already on top
}
MOV32(x2, getJumpTable());
MOV_REG_LSR_IMM5(x3, xEIP, JMPTABL_SHIFT);
@ -328,11 +327,10 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
// pop the actual return address for ARM stack
LDM(xSP, (1<<x2)|(1<<x3));
CMPS_REG_LSL_IMM5(x3, xEIP, 0); // is it the right address?
BLcond(cEQ, x2);
BXcond(cEQ, x2);
// not the correct return address, regular jump, but purge the stack first, it's unsync now...
CMPS_IMM8(x2, 0); // that was already the top of the stack...
LDR_IMM9_COND(cNE, xSP, xEmu, offsetof(x86emu_t, xSPSave));
SUB_IMM8(xSP, xSP, 16);
}
MOV32(x2, getJumpTable());
MOV_REG_LSR_IMM5(x3, xEIP, JMPTABL_SHIFT);
@ -603,26 +601,6 @@ void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg)
}
// x87 stuffs
static void x87_reset(dynarec_arm_t* dyn)
{
for (int i=0; i<8; ++i)
dyn->n.x87cache[i] = -1;
dyn->n.x87stack = 0;
dyn->n.stack = 0;
dyn->n.stack_next = 0;
dyn->n.stack_pop = 0;
dyn->n.stack_push = 0;
dyn->n.combined1 = dyn->n.combined2 = 0;
dyn->n.swapped = 0;
dyn->n.barrier = 0;
dyn->n.pushed = 0;
dyn->n.poped = 0;
for(int i=0; i<24; ++i)
if(dyn->n.neoncache[i].t == NEON_CACHE_ST_F || dyn->n.neoncache[i].t == NEON_CACHE_ST_D)
dyn->n.neoncache[i].v = 0;
}
void x87_stackcount(dynarec_arm_t* dyn, int ninst, int scratch)
{
if(!dyn->n.x87stack)
@ -1383,12 +1361,6 @@ void x87_restoreround(dynarec_arm_t* dyn, int ninst, int s1)
}
// MMX helpers
static void mmx_reset(dynarec_arm_t* dyn)
{
dyn->n.mmxcount = 0;
for (int i=0; i<8; ++i)
dyn->n.mmxcache[i] = -1;
}
static int isx87Empty(dynarec_arm_t* dyn)
{
for (int i=0; i<8; ++i)
@ -1470,11 +1442,6 @@ static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
// SSE / SSE2 helpers
static void sse_reset(dynarec_arm_t* dyn)
{
for (int i=0; i<8; ++i)
dyn->n.ssecache[i].v = -1;
}
// get neon register for a SSE reg, create the entry if needed
int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a, int forwrite)
{
@ -2099,14 +2066,6 @@ void fpu_unreflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
x87_unreflectcache(dyn, ninst, s1, s2, s3);
}
void fpu_reset(dynarec_arm_t* dyn)
{
x87_reset(dyn);
mmx_reset(dyn);
sse_reset(dyn);
fpu_reset_reg(dyn);
}
// get the single reg that from the double "reg" (so Dx[idx])
int fpu_get_single_reg(dynarec_arm_t* dyn, int ninst, int reg, int idx)
{

View File

@ -343,6 +343,37 @@
} \
#if STEP == 0
#define X87_PUSH_OR_FAIL(var, dyn, ninst, scratch, t) var = x87_do_push(dyn, ninst, scratch, t)
#define X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, scratch) x87_do_push_empty(dyn, ninst, scratch)
#define X87_POP_OR_FAIL(dyn, ninst, scratch) x87_do_pop(dyn, ninst, scratch)
#else
#define X87_PUSH_OR_FAIL(var, dyn, ninst, scratch, t) \
if ((dyn->n.x87stack==8) || (dyn->n.pushed==8)) { \
if(box86_dynarec_dump) dynarec_log(LOG_NONE, " Warning, suspicious x87 Push, stack=%d/%d on inst %d\n", dyn->n.x87stack, dyn->n.pushed, ninst); \
dyn->abort = 1; \
return addr; \
} \
var = x87_do_push(dyn, ninst, scratch, t)
#define X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, scratch) \
if ((dyn->n.x87stack==8) || (dyn->n.pushed==8)) { \
if(box86_dynarec_dump) dynarec_log(LOG_NONE, " Warning, suspicious x87 Push, stack=%d/%d on inst %d\n", dyn->n.x87stack, dyn->n.pushed, ninst); \
dyn->abort = 1; \
return addr; \
} \
x87_do_push_empty(dyn, ninst, scratch)
#define X87_POP_OR_FAIL(dyn, ninst, scratch) \
if ((dyn->n.x87stack==-8) || (dyn->n.poped==8)) { \
if(box86_dynarec_dump) dynarec_log(LOG_NONE, " Warning, suspicious x87 Pop, stack=%d/%d on inst %d\n", dyn->n.x87stack, dyn->n.poped, ninst); \
dyn->abort = 1; \
return addr; \
} \
x87_do_pop(dyn, ninst, scratch)
#endif
#define SET_DFNONE(S) if(!dyn->f.dfnone) {MOVW(S, d_none); STR_IMM9(S, xEmu, offsetof(x86emu_t, df)); dyn->f.dfnone=1;}
#define SET_DF(S, N) \
if(N) { \
@ -590,7 +621,6 @@ void* arm_next(x86emu_t* emu, uintptr_t addr);
#define fpu_pushcache STEPNAME(fpu_pushcache)
#define fpu_popcache STEPNAME(fpu_popcache)
#define fpu_reset STEPNAME(fpu_reset)
#define fpu_reset_cache STEPNAME(fpu_reset_cache)
#define fpu_propagate_stack STEPNAME(fpu_propagate_stack)
#define fpu_purgecache STEPNAME(fpu_purgecache)
@ -805,8 +835,6 @@ void sse_forget_reg(dynarec_arm_t* dyn, int ninst, int a, int s1);
int sse_reflect_reg(dynarec_arm_t* dyn, int ninst, int a, int s1);
// common coproc helpers
// reset the cache
void fpu_reset(dynarec_arm_t* dyn);
// reset the cache with n
void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n);
// propagate stack state

View File

@ -49,6 +49,9 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
dyn->forward_to = 0;
dyn->forward_size = 0;
dyn->forward_ninst = 0;
#if STEP == 0
memset(&dyn->insts[ninst], 0, sizeof(instruction_arm_t));
#endif
fpu_reset(dyn);
int reset_n = -1;
int stopblock = 2+(FindElfAddress(my_context, addr)?0:1); // if block is in elf_memory, it can be extended with bligblocks==2, else it needs 3 // ok, go now
@ -156,8 +159,8 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
ok = 1;
// we use the 1st predecessor here
int ii = ninst+1;
if(ii<dyn->size && !dyn->insts[ii].pred_sz) {
while(ii<dyn->size && (!dyn->insts[ii].pred_sz || (dyn->insts[ii].pred_sz==1 && dyn->insts[ii].pred[0]==ii-1))) {
if(ii<dyn->size && !dyn->insts[ii].x86.alive) {
while(ii<dyn->size && !dyn->insts[ii].x86.alive) {
// may need to skip opcodes to advance
++ninst;
NEW_INST;
@ -189,7 +192,7 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
if(dyn->forward_to == addr && !need_epilog && ok>=0) {
// we made it!
reset_n = get_first_jump(dyn, addr);
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x86.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to);
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p (ninst %d - %d)\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x86.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to, reset_n, ninst);
if(dyn->insts[dyn->forward_ninst].x86.has_callret && !dyn->insts[dyn->forward_ninst].x86.has_next)
dyn->insts[dyn->forward_ninst].x86.has_next = 1; // this block actually continue
dyn->forward = 0;
@ -197,7 +200,7 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
dyn->forward_size = 0;
dyn->forward_ninst = 0;
ok = 1; // in case it was 0
} else if ((dyn->forward_to < addr) || !ok) {
} else if ((dyn->forward_to < addr) || ok<=0) {
// something when wrong! rollback
if(box86_dynarec_dump) dynarec_log(LOG_NONE, "Could not forward extend block for %d bytes %p -> %p\n", dyn->forward_to-dyn->forward, (void*)dyn->forward, (void*)dyn->forward_to);
ok = 0;
@ -272,6 +275,7 @@ uintptr_t arm_pass(dynarec_arm_t* dyn, uintptr_t addr)
reset_n = -2;
++ninst;
#if STEP == 0
memset(&dyn->insts[ninst], 0, sizeof(instruction_arm_t));
if(ok && (((box86_dynarec_bigblock<stopblock) && !isJumpTableDefault((void*)addr))
|| (addr>=box86_nodynarec_start && addr<box86_nodynarec_end)))
#else

View File

@ -22,11 +22,6 @@
#define SET_HASCALLRET() dyn->insts[ninst].x86.has_callret = 1
#define NEW_INST \
++dyn->size; \
if(dyn->size+3>=dyn->cap) { \
dyn->insts = (instruction_arm_t*)dynaRealloc(dyn->insts, sizeof(instruction_arm_t)*dyn->cap*2);\
memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_arm_t)*dyn->cap); \
dyn->cap *= 2; \
} \
dyn->insts[ninst].x86.addr = ip; \
dyn->n.combined1 = dyn->n.combined2 = 0;\
dyn->n.swapped = 0; dyn->n.barrier = 0; \

View File

@ -114,6 +114,7 @@ typedef struct dynarec_arm_s {
int32_t forward_size; // size at the forward point
int forward_ninst; // ninst at the forward point
uint8_t always_test;
uint8_t abort; // abort the creation of the block
} dynarec_arm_t;
void add_next(dynarec_arm_t *dyn, uintptr_t addr);

View File

@ -37,6 +37,7 @@ typedef struct instruction_x86_s {
uint8_t jmp_cond:1; // 1 of conditionnal jump
uint8_t has_next:1; // does this opcode can continue to the next?
uint8_t has_callret:1; // this instruction have an optimised call setup
uint8_t alive:1; // this opcode gets executed (0 if dead code in that block)
uint8_t barrier; // next instruction is a jump point, so no optim allowed
uint8_t barrier_next; // next instruction needs a barrier
uint8_t state_flags;// One of SF_XXX state

View File

@ -62,9 +62,9 @@ typedef struct x86emu_s {
mmx87_regs_t mmx[8];
uint32_t top; // top is part of sw, but it's faster to have it separatly
int fpu_stack;
uint32_t fpu_tags; // tags for the x87 regs, stacked, only on a 16bits anyway
fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst
fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence
uint32_t fpu_tags; // tags for the x87 regs, stacked, only on a 16bits anyway
// sse
sse_regs_t xmm[8];
mmxcontrol_t mxcsr;

View File

@ -5,6 +5,8 @@ typedef struct dynablock_s dynablock_t;
typedef struct x86emu_s x86emu_t;
typedef struct instsize_s instsize_t;
#define MAX_INSTS 32760
void addInst(instsize_t* insts, size_t* size, int x86_size, int arm_size);
void CancelBlock();