diff --git a/CMakeLists.txt b/CMakeLists.txt index d0abfdba..b252457d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1007,5 +1007,12 @@ add_test(sse4_2 ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test25 -D TEST_OUTPUT=tmpfile25.txt -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref25.txt -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) + +add_test(fpu_rounding ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} + -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test26 -D TEST_OUTPUT=tmpfile26.txt + -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref26.txt + -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) + +set_tests_properties(fpu_rounding PROPERTIES ENVIRONMENT "BOX86_DYNAREC_FASTROUND=0") endif(BOX86LIB) diff --git a/docs/USAGE.md b/docs/USAGE.md index 24aaed39..106c5968 100755 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -162,8 +162,8 @@ Enable/Disable generation of -NAN #### BOX86_DYNAREC_FASTROUND * Enable/Disable generation of precise x86 rounding -* 0 : Generate float/double -> int rounding like on x86 -* 1 : Don't do anything special with edge case Rounding, to go as fast as possible (no INF/NAN/Overflow -> MIN_INT conversion) (faster, Default) +* 0 : Generate float/double -> int rounding and use current rounding mode for float/double compution like on x86 +* 1 : Don't do anything special with edge case Rounding, to go as fast as possible (no INF/NAN/Overflow -> MIN_INT conversion, and no set rounding mode) (faster, Default) #### BOX86_DYNAREC_SAFEFLAGS * Handling of flags on CALL/RET opcodes diff --git a/src/dynarec/arm_emitter.h b/src/dynarec/arm_emitter.h index dde75b2c..9e5244cb 100755 --- a/src/dynarec/arm_emitter.h +++ b/src/dynarec/arm_emitter.h @@ -192,6 +192,9 @@ Op is 20-27 // and dst, src1, #imm ror rot*2 #define AND_IMM8_ROR(dst, src, imm8, rot) \ EMIT(0xe2000000 | ((dst) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) +// and.c dst, src, #(imm8) +#define AND_IMM8_COND(cond, dst, src, imm8) \ + EMIT((cond) | 0x02000000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) // and.s dst, src, #(imm8) #define ANDS_IMM8(dst, src, imm8) \ EMIT(0xe2100000 | ((dst) << 12) | ((src) << 16) | brIMM(imm8) ) diff --git a/src/dynarec/dynarec_arm_d8.c b/src/dynarec/dynarec_arm_d8.c index 7cb6844a..78f96ee4 100755 --- a/src/dynarec/dynarec_arm_d8.c +++ b/src/dynarec/dynarec_arm_d8.c @@ -33,6 +33,7 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int d1; int fixedaddress; int parity; + uint8_t u8; MAYUSE(d1); MAYUSE(s0); @@ -52,11 +53,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADD ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, v2); } else { VADD_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -69,11 +74,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMUL ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, v2); } else { VMUL_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -123,11 +132,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUB ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, v2); } else { VSUB_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -140,11 +153,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v2, v1); } else { VSUB_F64(v1, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -157,11 +174,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIV ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, v2); } else { VDIV_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -174,11 +195,15 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVR ST0, STx"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v2, v1); } else { VDIV_F64(v1, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: @@ -196,12 +221,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VADD_F64(v1, v1, d1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, float[ED]"); @@ -216,12 +245,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VMUL_F64(v1, v1, d1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, float[ED]"); @@ -279,12 +312,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VSUB_F64(v1, v1, d1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, float[ED]"); @@ -299,12 +336,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, s0, v1); } else { VCVT_F64_F32(d1, s0); VSUB_F64(v1, d1, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, float[ED]"); @@ -319,12 +360,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, s0); } else { VCVT_F64_F32(d1, s0); VDIV_F64(v1, v1, d1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, float[ED]"); @@ -339,12 +384,16 @@ uintptr_t dynarecD8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; VMOVtoV(s0, ed); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, s0, v1); } else { VCVT_F64_F32(d1, s0); VDIV_F64(v1, d1, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: DEFAULT; diff --git a/src/dynarec/dynarec_arm_d9.c b/src/dynarec/dynarec_arm_d9.c index f54f3cec..819eb993 100755 --- a/src/dynarec/dynarec_arm_d9.c +++ b/src/dynarec/dynarec_arm_d9.c @@ -281,18 +281,20 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, CALL(arm_f2xm1, -1, 0); #else v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - //if(ST0.d!=0.0) - // ST0.d = exp2(ST0.d)-1.0; - VMOV_64(0, v1); - CALL_1D(exp2, 0); // return is d0 if((PK(0)==0xD9 && PK(1)==0xE8) && // next inst is FLD1 (PK(2)==0xDE && PK(3)==0xC1)) { MESSAGE(LOG_DUMP, "Hack for fld1 / faddp st1, st0\n"); + VMOV_64(0, v1); + CALL_1D(exp2, 0); // return is d0 VMOV_64(v1, 0); addr+=4; } else { - VMOV_i_64(v1, 0b01110000); // 1.0 - VSUB_F64(v1, 0, v1); + //ST0.d = expm1(LN2 * ST0.d); + MOV32(x2, (&d_ln2)); + VLDR_64(0, x2, 0); + VMUL_F64(0, 0, v1); + CALL_1D(expm1, 0); // return is d0 + VMOV_64(v1, 0); } #endif // should set C1 to 0 @@ -301,6 +303,7 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FYL2X"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + VMOV_64(0, v1); // prepare call to log2 CALL_1D(log2, 0); VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d)*ST(1).d @@ -311,9 +314,14 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FPTAN"); v2 = x87_do_push(dyn, ninst, x1, box86_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F); v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + // seems that tan of glib doesn't follow the rounding direction mode + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); // prepare call to tan - CALL_1D(tan, 0); + CALL_1D(tan, box86_dynarec_fastround ? 0 : (1 << u8)); VMOV_64(v1, 0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; //emu->sw.f.F87_C1 = 0; LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); @@ -338,10 +346,14 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FPATAN"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v2); // prepare call to atan2 VMOV_64(1, v1); - CALL_2D(atan2, 0); + CALL_2D(atan2, box86_dynarec_fastround ? 0 : (1 << u8)); VMOV_64(v2, 0); //ST(1).d = atan2(ST1.d, ST0.d); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; @@ -449,32 +461,45 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FYL2XP1"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); - VMOV_i_64(0, 0b01110000); // D0 = 1.0 - VADD_F64(0, 0, v1); // prepare call to log2 - CALL_1D(log2, 0); - VMUL_F64(v2, v2, 0); //ST(1).d = log2(ST0.d + 1.0)*ST(1).d; + + //ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; + VMOV_64(0, v1); // prepare call to log1p + CALL_1D(log1p, 0); + VMUL_F64(v2, v2, 0); + MOV32(x2, (&d_ln2)); + VLDR_64(0, x2, 0); + VDIV_F64(v2, v2, 0); x87_do_pop(dyn, ninst, x3); // should set C1 to 0 break; case 0xFA: INST_NAME("FSQRT"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSQRT_F32(v1, v1); } else { VSQRT_F64(v1, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); // should set C1 to 0 break; case 0xFB: INST_NAME("FSINCOS"); v2 = x87_do_push(dyn, ninst, x3, NEON_CACHE_ST_D); v1 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + // seems that sin and cos function of glibc don't follow the rounding mode + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); - CALL_1D(sin, 0); + CALL_1D(sin, box86_dynarec_fastround ? 0 : (1 << u8)); VSWP(v1, 0); - CALL_1D(cos, 0); // would it be faster to do sqrt(1-sin()²) ??? + CALL_1D(cos, box86_dynarec_fastround ? 0 : (1 << u8)); // would it be faster to do sqrt(1-sin()²) ??? VMOV_64(v2, 0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; C1 too LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); BFC(x1, 9, 2); //C2 C1 = 0 0 @@ -509,26 +534,32 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); //if(ST0.d!=0.0) - // ST0.d *= exp2(trunc(ST1.d)); - VCMP_F64_0(v1); - VMRS_APSR(); - B_NEXT(cEQ); - if(!arm_v8) { - VMOV_64(0, v2); - CALL_1DD(trunc, exp2, 0); - } else { - VRINTZ_F64(0, v2); - CALL_1D(exp2, 0); - } - VMUL_F64(v1, v1, 0); + // ST0.d = ldexp(ST0.d, trunc(ST1.d)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); + s0 = fpu_get_scratch_single(dyn); + // value of s0 = + // 2^31-1 (ST1 >= 2^31), -2^31 (ST1 < -2^31) or int(ST1) (other situations) + VCVT_S32_F64(s0 , v2); + VMOVfrV(x2, s0); + VMOV_64(0, v1); + CALL_1DDR(ldexp, x2, x3, box86_dynarec_fastround ? 0 : (1 << u8)); + VMOV_64(v1, 0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); // should set C1 to 0 break; case 0xFE: INST_NAME("FSIN"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + // seems that sin of glib doesn't follow the rounding direction mode + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); // prepare call to sin - CALL_1D(sin, 0); + CALL_1D(sin, box86_dynarec_fastround ? 0 : (1 << u8)); VMOV_64(v1, 0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; C1 too LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); BFC(x1, 9, 2); //C2 C1 = 0 0 @@ -537,9 +568,14 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, case 0xFF: INST_NAME("FCOS"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + // seems that cos of glib doesn't follow the rounding direction mode + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMOV_64(0, v1); // prepare call to cos - CALL_1D(cos, 0); + CALL_1D(cos, box86_dynarec_fastround ? 0 : (1 << u8)); VMOV_64(v1, 0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); //emu->sw.f.F87_C2 = 0; C1 too LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, sw)); BFC(x1, 9, 2); //C2 C1 = 0 0 @@ -598,7 +634,11 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = v1; else { s0 = fpu_get_scratch_single(dyn); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VCVT_F32_F64(s0, v1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } parity = getedparity(dyn, ninst, addr, nextop, 2); if(parity) { @@ -617,7 +657,11 @@ uintptr_t dynarecD9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = v1; else { s0 = fpu_get_scratch_single(dyn); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VCVT_F32_F64(s0, v1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); } parity = getedparity(dyn, ninst, addr, nextop, 2); if(parity) { diff --git a/src/dynarec/dynarec_arm_da.c b/src/dynarec/dynarec_arm_da.c index 3a4f44d8..a2b27957 100755 --- a/src/dynarec/dynarec_arm_da.c +++ b/src/dynarec/dynarec_arm_da.c @@ -33,6 +33,7 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int d0; int s0; int fixedaddress; + uint8_t u8; MAYUSE(s0); MAYUSE(d0); @@ -155,7 +156,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VADD_F64(v1, v1, d0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FIMUL ST0, Ed"); @@ -165,7 +170,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMUL_F64(v1, v1, d0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FICOM ST0, Ed"); @@ -198,7 +207,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, v1, d0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FISUBR ST0, Ed"); @@ -208,7 +221,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, d0, v1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FIDIV ST0, Ed"); @@ -218,7 +235,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, v1, d0); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FIDIVR ST0, Ed"); @@ -228,7 +249,11 @@ uintptr_t dynarecDA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, s0 = fpu_get_scratch_single(dyn); VMOVtoV(s0, ed); VCVT_F64_S32(d0, s0); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, d0, v1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/dynarec_arm_dc.c b/src/dynarec/dynarec_arm_dc.c index baef4f6b..e9d6e67c 100755 --- a/src/dynarec/dynarec_arm_dc.c +++ b/src/dynarec/dynarec_arm_dc.c @@ -31,6 +31,7 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, int d1; int fixedaddress; int parity; + uint8_t u8; MAYUSE(d1); MAYUSE(v2); @@ -48,11 +49,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADD STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v1, v1, v2); } else { VADD_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xC8: case 0xC9: @@ -65,11 +70,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMUL STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v1, v1, v2); } else { VMUL_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xD0: case 0xD1: @@ -119,11 +128,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v2, v1); } else { VSUB_F64(v1, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xE8: case 0xE9: @@ -136,11 +149,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUB STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v1, v1, v2); } else { VSUB_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF0: case 0xF1: @@ -153,11 +170,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVR STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v2, v1); } else { VDIV_F64(v1, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 0xF8: case 0xF9: @@ -170,11 +191,15 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIV STx, ST0"); v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v1 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v1, v1, v2); } else { VDIV_F64(v1, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; default: switch((nextop>>3)&7) { @@ -192,7 +217,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VADD_F64(v1, v1, d1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 1: INST_NAME("FMUL ST0, double[ED]"); @@ -208,7 +237,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VMUL_F64(v1, v1, d1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 2: INST_NAME("FCOM ST0, double[ED]"); @@ -259,7 +292,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, v1, d1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 5: INST_NAME("FSUBR ST0, double[ED]"); @@ -275,7 +312,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VSUB_F64(v1, d1, v1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 6: INST_NAME("FDIV ST0, double[ED]"); @@ -291,7 +332,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, v1, d1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; case 7: INST_NAME("FDIVR ST0, double[ED]"); @@ -307,7 +352,11 @@ uintptr_t dynarecDC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, LDR_IMM9(x3, wback, fixedaddress+4); VMOVtoV_D(d1, x2, x3); } + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); VDIV_F64(v1, d1, v1); + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); break; } } diff --git a/src/dynarec/dynarec_arm_de.c b/src/dynarec/dynarec_arm_de.c index 28d169e0..af95e710 100755 --- a/src/dynarec/dynarec_arm_de.c +++ b/src/dynarec/dynarec_arm_de.c @@ -7,6 +7,7 @@ #include "debug.h" #include "box86context.h" #include "dynarec.h" +#include "dynarec/arm_emitter.h" #include "emu/x86emu_private.h" #include "emu/x86run_private.h" #include "x86run.h" @@ -27,6 +28,7 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, { uint8_t nextop = F8; int v1, v2; + uint8_t u8; MAYUSE(v2); MAYUSE(v1); @@ -43,11 +45,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FADDP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VADD_F32(v2, v2, v1); } else { VADD_F64(v2, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xC8: @@ -61,11 +67,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FMULP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VMUL_F32(v2, v2, v1); } else { VMUL_F64(v2, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xD0: @@ -112,11 +122,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBRP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v2, v1, v2); } else { VSUB_F64(v2, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xE8: @@ -130,11 +144,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FSUBP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VSUB_F32(v2, v2, v1); } else { VSUB_F64(v2, v2, v1); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xF0: @@ -148,11 +166,15 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVRP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); + if(!box86_dynarec_fastround) + u8 = x87_setround(dyn, ninst, x1, x2, x14); if(ST_IS_F(0)) { VDIV_F32(v2, v1, v2); } else { VDIV_F64(v2, v1, v2); } + if(!box86_dynarec_fastround) + x87_restoreround(dyn, ninst, u8); x87_do_pop(dyn, ninst, x3); break; case 0xF8: @@ -166,11 +188,21 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, INST_NAME("FDIVP STx, ST0"); v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop&7)); v2 = x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_COMBINE(0, nextop&7)); - if(!box86_dynarec_fastnan) { + if(!box86_dynarec_fastround || !box86_dynarec_fastnan) { VMRS(x14); // get fpscr - ORR_IMM8(x3, x14, 0b010, 9); // enable exceptions - BIC_IMM8(x3, x3, 0b10011111, 0); - VMSR(x3); + if(!box86_dynarec_fastnan) { + ORR_IMM8(x3, x14, 0b010, 9); // enable exceptions + BIC_IMM8(x3, x3, 0b10011111, 0); + } else if(!box86_dynarec_fastround) + MOV_REG(x3, x14); + if(!box86_dynarec_fastround){ + LDRH_IMM8(x1, xEmu, offsetof(x86emu_t, cw)); // hopefully cw is not too far for an imm8 + UBFX(x1, x1, 10, 2); // extract round... + UBFX(x2, x1, 1, 1); // swap bits 0 and 1 + BFI(x2, x1, 1, 1); + BFI(x3, x2, 22, 2); // inject new round + } + VMSR(x3); // put new fpscr } if(ST_IS_F(0)) { VDIV_F32(v2, v2, v1); @@ -185,8 +217,9 @@ uintptr_t dynarecDE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, } else { VNEG_F64_cond(cNE, v2, v2); } - VMSR(x14); // restore fpscr } + if(!box86_dynarec_fastround || !box86_dynarec_fastnan) + VMSR(x14); // restore fpscr x87_do_pop(dyn, ninst, x3); break; diff --git a/src/dynarec/dynarec_arm_helper.c b/src/dynarec/dynarec_arm_helper.c index 52c813f2..f4bc70fb 100755 --- a/src/dynarec/dynarec_arm_helper.c +++ b/src/dynarec/dynarec_arm_helper.c @@ -498,6 +498,53 @@ void call_d(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int n, int reg } SET_NODF(); } +// call a function with 1 double arg (taking care of the SOFTFP / HARD call) and 1 non-float arg that return a double +void call_ddr(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int arg, int reg, int ret, uint32_t mask, int saveflags) +{ + if(ret!=-2 && !mask) { + // ARM ABI require the stack to be 8-bytes aligned! + // so, if no mask asked, add one to stay 8-bytes aligned + if(ret!=xFlags) mask=1< r0:r1 + MOV_REG(2, arg); + #else + MOV_REG(0, arg); + #endif + MOV32(reg, (uintptr_t)fnc); + BLX(reg); + if(fnc2) { + #ifdef ARM_SOFTFP + // result are already in r0:r1 for next call + #endif + MOV32(reg, (uintptr_t)fnc2); + BLX(reg); + } + #ifdef ARM_SOFTFP + POP(xSP, (1<<2) | (1<<3)); + VMOVtoV_64(0, 0, 1); // load r0:r1 to D0 to simulate hardfo + #endif + fpu_popcache(dyn, ninst, reg); + if(ret>=0) { + MOV_REG(ret, 0); + } + if(ret!=-2) { + POP(xSP, (1<= 0) + MOV_REG(s3, s1); BFI(s1, s2, 22, 2); // inject new round VMSR(s1); // put new fpscr return s3; diff --git a/src/dynarec/dynarec_arm_helper.h b/src/dynarec/dynarec_arm_helper.h index e8ea1515..7068d555 100755 --- a/src/dynarec/dynarec_arm_helper.h +++ b/src/dynarec/dynarec_arm_helper.h @@ -262,8 +262,10 @@ #define CALL_1DD(F, F2, M) call_d(dyn, ninst, F, F2, 1, x3, -1, M, 0) // CALL_1D_U64 will use S as scratch. Return value in ret/ret2, 1 ARG in D0 #define CALL_1DR_U64(R, ret, ret2, S, M) call_dr(dyn, ninst, R, 1, S, ret, ret2, M, 1) -// CALL_1D will use S as scratch. Return value in D0, 1 ARG in D0 +// CALL_1DR will use S as scratch. Return value in D0, 1 ARG in D0 #define CALL_1DR(R, S, M) call_dr(dyn, ninst, R, 1, S, -1, -1, M, 0) +// CALL_1DI will use S as scratch. Return value in D0, 1 ARG in D0, 1 ARG in R0 +#define CALL_1DDR(F, R, S, M) call_ddr(dyn, ninst, F, NULL, R, S, -1, M, 0) // CALL_1RD will use S as scratch. Return value in D0, 1 ARG in R #define CALL_1RD(F, R, S, M) call_rd(dyn, ninst, F, R, S, M, 0); @@ -463,6 +465,7 @@ void* arm_next(x86emu_t* emu, uintptr_t addr); #define iret_to_epilog STEPNAME(iret_to_epilog_) #define call_c STEPNAME(call_c_) #define call_d STEPNAME(call_d_) +#define call_ddr STEPNAME(call_ddr_) #define call_dr STEPNAME(call_dr_) #define call_rd STEPNAME(call_rd_) #define grab_fsdata STEPNAME(grab_fsdata_) @@ -616,6 +619,7 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst); void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, uint32_t mask, int saveflags); void call_d(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int n, int reg, int ret, uint32_t mask, int saveflags); void call_dr(dynarec_arm_t* dyn, int ninst, int reg, int n, int s1, int ret, int ret2, uint32_t mask, int saveflags); +void call_ddr(dynarec_arm_t* dyn, int ninst, void* fnc, void* fnc2, int arg, int reg, int ret, uint32_t mask, int saveflags); void call_rd(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int s1, uint32_t mask, int saveflags); void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg); diff --git a/src/emu/x86rund8.c b/src/emu/x86rund8.c index 944f5df3..14263549 100755 --- a/src/emu/x86rund8.c +++ b/src/emu/x86rund8.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,6 +12,7 @@ #include "x86emu_private.h" #include "x86run_private.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "x86primop.h" #include "x86trace.h" #include "box86context.h" @@ -32,6 +34,7 @@ uintptr_t RunD8(x86emu_t *emu, uintptr_t addr) #ifdef TEST_INTERPRETER x86emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; switch (nextop) { @@ -192,9 +195,11 @@ uintptr_t RunD8(x86emu_t *emu, uintptr_t addr) } break; default: + fesetround(oldround); return 0; } } - return addr; + fesetround(oldround); + return addr; } #pragma GCC diagnostic pop diff --git a/src/emu/x86rund9.c b/src/emu/x86rund9.c index 753aa442..2b0775a8 100755 --- a/src/emu/x86rund9.c +++ b/src/emu/x86rund9.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include #include #include #include @@ -12,6 +13,7 @@ #include "x86emu_private.h" #include "x86run_private.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "x86primop.h" #include "x86trace.h" #include "box86context.h" @@ -33,6 +35,7 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) x86emu_t*emu = test->emu; #endif + int oldround; nextop = F8; switch (nextop) { case 0xC0: @@ -110,7 +113,13 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) break; case 0xF0: /* F2XM1 */ - ST0.d = exp2(ST0.d) - 1.0; + if (ST0.d == 0) + break; + // Using the expm1 instead of exp2(ST0)-1 can avoid losing precision much, + // expecially when ST0 is close to zero (which loses the precise when -1). + // printf("%a, %a\n", LN2 * ST0.d, expm1(LN2 * ST0.d)); + ST0.d = expm1(LN2 * ST0.d); + // = 2^ST0 - 1 + error. (in math) emu->sw.f.F87_C1 = 0; break; case 0xF1: /* FYL2X */ @@ -119,14 +128,18 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->sw.f.F87_C1 = 0; break; case 0xF2: /* FPTAN */ + oldround = fpu_setround(emu); ST0.d = tan(ST0.d); + fesetround(oldround); fpu_do_push(emu); ST0.d = 1.0; emu->sw.f.F87_C2 = 0; emu->sw.f.F87_C1 = 0; break; case 0xF3: /* FPATAN */ + oldround = fpu_setround(emu); ST1.d = atan2(ST1.d, ST0.d); + fesetround(oldround); fpu_do_pop(emu); emu->sw.f.F87_C1 = 0; break; @@ -187,17 +200,24 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->top=(emu->top+1)&7; // this will probably break a few things break; case 0xF9: /* FYL2XP1 */ - ST(1).d *= log2(ST0.d + 1.0); + // Using the log1p instead of log2(ST0+1) can avoid losing precision much, + // expecially when ST0 is close to zero (which loses the precise when +1). + ST(1).d = (ST(1).d * log1p(ST0.d)) / M_LN2; + // = ST1 * log2(ST0 + 1) + error. (in math) fpu_do_pop(emu); emu->sw.f.F87_C1 = 0; break; case 0xFA: /* FSQRT */ + oldround = fpu_setround(emu); ST0.d = sqrt(ST0.d); + fesetround(oldround); emu->sw.f.F87_C1 = 0; break; case 0xFB: /* FSINCOS */ fpu_do_push(emu); + oldround = fpu_setround(emu); sincos(ST1.d, &ST1.d, &ST0.d); + fesetround(oldround); emu->sw.f.F87_C2 = 0; emu->sw.f.F87_C1 = 0; break; @@ -206,18 +226,30 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) emu->sw.f.F87_C1 = 0; break; case 0xFD: /* FSCALE */ - // this could probably be done by just altering the exponant part of the float... - if(ST0.d!=0.0) - ST0.d *= exp2(trunc(ST1.d)); + if (ST1.d > INT32_MAX) + tmp32s = INT32_MAX; + else if (ST1.d < INT32_MIN) + tmp32s = INT32_MIN; + else + tmp32s = ST1.d; + if(ST0.d!=0.0) { + oldround = fpu_setround(emu); + ST0.d = ldexp(ST0.d, tmp32s); + fesetround(oldround); + } emu->sw.f.F87_C1 = 0; break; case 0xFE: /* FSIN */ + oldround = fpu_setround(emu); ST0.d = sin(ST0.d); + fesetround(oldround); emu->sw.f.F87_C2 = 0; emu->sw.f.F87_C1 = 0; break; case 0xFF: /* FCOS */ + oldround = fpu_setround(emu); ST0.d = cos(ST0.d); + fesetround(oldround); emu->sw.f.F87_C2 = 0; emu->sw.f.F87_C1 = 0; break; @@ -256,18 +288,22 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) break; case 2: /* FST Ed, ST0 */ GET_ED; - if(!(((uintptr_t)ED)&3)) + if(!(((uintptr_t)ED)&3)) { + oldround = fpu_setround(emu); *(float*)ED = ST0.d; - else { + fesetround(oldround); + } else { f = ST0.d; memcpy(ED, &f, sizeof(float)); } break; case 3: /* FSTP Ed, ST0 */ GET_ED; - if(!(((uintptr_t)ED)&3)) + if(!(((uintptr_t)ED)&3)) { + oldround = fpu_setround(emu); *(float*)ED = ST0.d; - else { + fesetround(oldround); + } else { f = ST0.d; memcpy(ED, &f, sizeof(float)); } @@ -303,5 +339,5 @@ uintptr_t RunD9(x86emu_t *emu, uintptr_t addr) return 0; } } - return addr; + return addr; } diff --git a/src/emu/x86runda.c b/src/emu/x86runda.c index c797c96f..4904ea02 100755 --- a/src/emu/x86runda.c +++ b/src/emu/x86runda.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,6 +12,7 @@ #include "x86emu_private.h" #include "x86run_private.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "x86primop.h" #include "x86trace.h" #include "box86context.h" @@ -24,7 +26,6 @@ uintptr_t RunDA(x86emu_t *emu, uintptr_t addr) #endif { uint8_t nextop; - int32_t tmp32s; int64_t ll; float f; reg32_t *oped; @@ -101,7 +102,8 @@ uintptr_t RunDA(x86emu_t *emu, uintptr_t addr) case 0xF9: case 0xFD: return 0; - default: + default:; + int oldround = fpu_setround(emu); switch((nextop>>3)&7) { case 0: /* FIADD ST0, Ed int */ GET_ED; @@ -137,6 +139,7 @@ uintptr_t RunDA(x86emu_t *emu, uintptr_t addr) ST0.d = (double)ED->sdword[0] / ST0.d; break; } + fesetround(oldround); } return addr; } \ No newline at end of file diff --git a/src/emu/x86rundc.c b/src/emu/x86rundc.c index ed1fbfb9..2c672573 100755 --- a/src/emu/x86rundc.c +++ b/src/emu/x86rundc.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,6 +12,7 @@ #include "x86emu_private.h" #include "x86run_private.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "x86primop.h" #include "x86trace.h" #include "box86context.h" @@ -32,6 +34,7 @@ uintptr_t RunDC(x86emu_t *emu, uintptr_t addr) x86emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; switch(nextop) { case 0xC0: @@ -184,8 +187,10 @@ uintptr_t RunDC(x86emu_t *emu, uintptr_t addr) } break; default: + fesetround(oldround); return 0; } } + fesetround(oldround); return addr; } \ No newline at end of file diff --git a/src/emu/x86runde.c b/src/emu/x86runde.c index f44a3c69..26beca5f 100755 --- a/src/emu/x86runde.c +++ b/src/emu/x86runde.c @@ -11,6 +11,7 @@ #include "x86emu_private.h" #include "x86run_private.h" #include "x87emu_private.h" +#include "x87emu_setround.h" #include "x86primop.h" #include "x86trace.h" #include "box86context.h" @@ -32,6 +33,7 @@ uintptr_t RunDE(x86emu_t *emu, uintptr_t addr) x86emu_t*emu = test->emu; #endif + int oldround = fpu_setround(emu); nextop = F8; switch (nextop) { case 0xC0: /* FADDP STx, ST0 */ @@ -158,8 +160,10 @@ uintptr_t RunDE(x86emu_t *emu, uintptr_t addr) ST0.d = (double)EW->sword[0] / ST0.d; break; default: + fesetround(oldround); return 0; } } + fesetround(oldround); return addr; } \ No newline at end of file diff --git a/src/emu/x86rundf.c b/src/emu/x86rundf.c index 45ef62e0..d5a95e38 100755 --- a/src/emu/x86rundf.c +++ b/src/emu/x86rundf.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/emu/x86test.c b/src/emu/x86test.c index ea6a9515..d6a6f1b0 100644 --- a/src/emu/x86test.c +++ b/src/emu/x86test.c @@ -75,7 +75,7 @@ void x86test_check(x86emu_t* ref, uintptr_t ip) for(int i=0; ifpu_stack; ++i) { if(ref->x87[(ref->top+i)&7].d != emu->x87[(emu->top+i)&7].d) { BANNER; - printf_log(LOG_NONE, "ST%d: %g | %g\n", i, ref->x87[(ref->top+i)&7].d, emu->x87[(emu->top+i)&7].d); + printf_log(LOG_NONE, "ST%d: %g (%a) | %g (%a)\n", i, ref->x87[(ref->top+i)&7].d, ref->x87[(ref->top+i)&7].d, emu->x87[(emu->top+i)&7].d, emu->x87[(emu->top+i)&7].d); } } } diff --git a/src/emu/x87emu_private.c b/src/emu/x87emu_private.c index 3c38f519..a983ccc2 100755 --- a/src/emu/x87emu_private.c +++ b/src/emu/x87emu_private.c @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/emu/x87emu_setround.h b/src/emu/x87emu_setround.h new file mode 100644 index 00000000..bd2288db --- /dev/null +++ b/src/emu/x87emu_setround.h @@ -0,0 +1,29 @@ +#ifndef __SETROUND_H__ +#define __SETROUND_H__ +#pragma STDC FENV_ACCESS ON +#include +#include +#include "x86emu.h" +#include "x86emu_private.h" +// set the rounding mode to the emulator's one, and return the old one +static inline int fpu_setround(x86emu_t* emu) { + int ret = fegetround(); + int rounding_direction; + switch (emu->cw.f.C87_RD) { + case ROUND_Nearest: + rounding_direction = FE_TONEAREST; + break; + case ROUND_Down: + rounding_direction = FE_DOWNWARD; + break; + case ROUND_Up: + rounding_direction = FE_UPWARD; + break; + case ROUND_Chop: + rounding_direction = FE_TOWARDZERO; + break; + } + fesetround(rounding_direction); + return ret; +} +#endif diff --git a/src/main.c b/src/main.c index 793324c2..19ec9b12 100644 --- a/src/main.c +++ b/src/main.c @@ -434,7 +434,7 @@ void LoadLogEnv() box86_dynarec_fastround = p[0]-'0'; } if(!box86_dynarec_fastround) - printf_log(LOG_INFO, "Dynarec will try to generate x86 precise IEEE->int rounding\n"); + printf_log(LOG_INFO, "Dynarec will try to generate x86 precise IEEE->int rounding and set rounding mode for computation\n"); } p = getenv("BOX86_DYNAREC_SAFEFLAGS"); if(p) { diff --git a/tests/ref26.txt b/tests/ref26.txt new file mode 100644 index 00000000..358877e1 --- /dev/null +++ b/tests/ref26.txt @@ -0,0 +1,810 @@ +Testing: s = (0x1.123456789abcp2) -> (double)s +FE_TONEAREST 0x1.123456789abcp+2 +FE_DOWNWARD 0x1.123456789abcp+2 +FE_UPWARD 0x1.123456789abcp+2 +FE_TOWARDZERO 0x1.123456789abcp+2 + +Testing: s = (0x1.123456789abcp2) -> (float)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2)) -> (double)s +FE_TONEAREST -0x1.123456789abcp+2 +FE_DOWNWARD -0x1.123456789abcp+2 +FE_UPWARD -0x1.123456789abcp+2 +FE_TOWARDZERO -0x1.123456789abcp+2 + +Testing: s = (-(0x1.123456789abcp2)) -> (float)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: d = (0x1.123456789abcp512) -> (float)d +FE_TONEAREST inf +FE_DOWNWARD 0x1.fffffep+127 +FE_UPWARD inf +FE_TOWARDZERO 0x1.fffffep+127 + +Testing: s = (0x1.123456789abcp29) -> (double)s +FE_TONEAREST 0x1.123456789abcp+29 +FE_DOWNWARD 0x1.123456789abcp+29 +FE_UPWARD 0x1.123456789abcp+29 +FE_TOWARDZERO 0x1.123456789abcp+29 + +Testing: s = (0x1.123456789abcp29) -> (float)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29) -> (int16_t)s +FE_TONEAREST -32768 +FE_DOWNWARD -32768 +FE_UPWARD -32768 +FE_TOWARDZERO -32768 + +Testing: s = (0x1.123456789abcp29) -> (int8_t)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (0x1.123456789abcp29) -> (unsigned short)s +FE_TONEAREST 35535 +FE_DOWNWARD 35535 +FE_UPWARD 35535 +FE_TOWARDZERO 35535 + +Testing: s = (0x1.123456789abcp29) -> (unsigned char)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (-(0x1.123456789abcp29)) -> (double)s +FE_TONEAREST -0x1.123456789abcp+29 +FE_DOWNWARD -0x1.123456789abcp+29 +FE_UPWARD -0x1.123456789abcp+29 +FE_TOWARDZERO -0x1.123456789abcp+29 + +Testing: s = (-(0x1.123456789abcp29)) -> (float)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: d = (-0x1.123456789abcp30) -> (int32_t)d +FE_TONEAREST -1150096798 +FE_DOWNWARD -1150096798 +FE_UPWARD -1150096798 +FE_TOWARDZERO -1150096798 + +Testing: d = (-0x1.123456789abcp62) -> (int64_t)d +FE_TONEAREST -4939628135293321216 +FE_DOWNWARD -4939628135293321216 +FE_UPWARD -4939628135293321216 +FE_TOWARDZERO -4939628135293321216 + +Testing: s = (0x1.123456789abcp2f) -> (double)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (0x1.123456789abcp2f) -> (float)s +FE_TONEAREST 0x1.123456p+2 +FE_DOWNWARD 0x1.123456p+2 +FE_UPWARD 0x1.123458p+2 +FE_TOWARDZERO 0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2f)) -> (double)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: s = (-(0x1.123456789abcp2f)) -> (float)s +FE_TONEAREST -0x1.123456p+2 +FE_DOWNWARD -0x1.123458p+2 +FE_UPWARD -0x1.123456p+2 +FE_TOWARDZERO -0x1.123456p+2 + +Testing: s = (0x1.123456789abcp29f) -> (double)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29f) -> (float)s +FE_TONEAREST 0x1.123456p+29 +FE_DOWNWARD 0x1.123456p+29 +FE_UPWARD 0x1.123458p+29 +FE_TOWARDZERO 0x1.123456p+29 + +Testing: s = (0x1.123456789abcp29f) -> (int16_t)s +FE_TONEAREST -32768 +FE_DOWNWARD -32768 +FE_UPWARD -32768 +FE_TOWARDZERO -32768 + +Testing: s = (0x1.123456789abcp29f) -> (int8_t)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (0x1.123456789abcp29f) -> (unsigned short)s +FE_TONEAREST 35520 +FE_DOWNWARD 35520 +FE_UPWARD 35584 +FE_TOWARDZERO 35520 + +Testing: s = (0x1.123456789abcp29f) -> (unsigned char)s +FE_TONEAREST 0 +FE_DOWNWARD 0 +FE_UPWARD 0 +FE_TOWARDZERO 0 + +Testing: s = (-(0x1.123456789abcp29f)) -> (double)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: s = (-(0x1.123456789abcp29f)) -> (float)s +FE_TONEAREST -0x1.123456p+29 +FE_DOWNWARD -0x1.123458p+29 +FE_UPWARD -0x1.123456p+29 +FE_TOWARDZERO -0x1.123456p+29 + +Testing: f = -0x1.123456789abcp30f -> (int32_t)f +FE_TONEAREST -1150096768 +FE_DOWNWARD -1150096896 +FE_UPWARD -1150096768 +FE_TOWARDZERO -1150096768 + +Testing: d = -0x1.1234567p0 -> (double)((int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.9234567p0 -> (double)((int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.9234567p0 -> (double)((int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.1234567p0 -> (double)((long int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.1234567p0 -> (double)((long int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: d = 0x1.9234567p0 -> (double)((long int)d) +FE_TONEAREST 0x1p+0 +FE_DOWNWARD 0x1p+0 +FE_UPWARD 0x1p+0 +FE_TOWARDZERO 0x1p+0 + +Testing: d = -0x1.9234567p0 -> (double)((long int)d) +FE_TONEAREST -0x1p+0 +FE_DOWNWARD -0x1p+0 +FE_UPWARD -0x1p+0 +FE_TOWARDZERO -0x1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d1 + d2 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d1 - d2 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST 0x1.2bc55ef8922bp+1 +FE_DOWNWARD 0x1.2bc55ef8922bp+1 +FE_UPWARD 0x1.2bc55ef8922bp+1 +FE_TOWARDZERO 0x1.2bc55ef8922bp+1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST 0x1.12233445566p-4 +FE_DOWNWARD 0x1.12233445566p-4 +FE_UPWARD 0x1.12233445566p-4 +FE_TOWARDZERO 0x1.12233445566p-4 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST -0x1.12233445566p-4 +FE_DOWNWARD -0x1.12233445566p-4 +FE_UPWARD -0x1.12233445566p-4 +FE_TOWARDZERO -0x1.12233445566p-4 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 + d2 +FE_TONEAREST -0x1.2bc55ef8922bp+1 +FE_DOWNWARD -0x1.2bc55ef8922bp+1 +FE_UPWARD -0x1.2bc55ef8922bp+1 +FE_TOWARDZERO -0x1.2bc55ef8922bp+1 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd3ddf57ep+0 +FE_UPWARD 0x1.5ebd428e6d5cp+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd404804dp+0 +FE_UPWARD -0x1.5ebd4025c068p+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd4025c068p+0 +FE_UPWARD -0x1.5ebd404804dp+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd428e6d5cp+0 +FE_UPWARD 0x1.5ebd3ddf57ep+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd3e8f5c27dp+0 +FE_UPWARD 0x1.5ebd40f809191p+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd40f809191p+0 +FE_UPWARD -0x1.5ebd3e8f5c27dp+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd3e8f5c27ep+0 +FE_UPWARD -0x1.5ebd40f80919p+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd40f80919p+0 +FE_UPWARD 0x1.5ebd3e8f5c27ep+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd402bc44c4p+0 +FE_DOWNWARD 0x1.5ebd402bc44c4p+0 +FE_UPWARD 0x1.5ebd402bc44c5p+0 +FE_TOWARDZERO 0x1.5ebd402bc44c4p+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd402bc44c4p+0 +FE_DOWNWARD -0x1.5ebd402bc44c5p+0 +FE_UPWARD -0x1.5ebd402bc44c4p+0 +FE_TOWARDZERO -0x1.5ebd402bc44c4p+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd402bc44c4p+0 +FE_DOWNWARD -0x1.5ebd402bc44c5p+0 +FE_UPWARD -0x1.5ebd402bc44c4p+0 +FE_TOWARDZERO -0x1.5ebd402bc44c4p+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd402bc44c4p+0 +FE_DOWNWARD 0x1.5ebd402bc44c4p+0 +FE_UPWARD 0x1.5ebd402bc44c5p+0 +FE_TOWARDZERO 0x1.5ebd402bc44c4p+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd3e8f5c27dp+0 +FE_UPWARD 0x1.5ebd40f809191p+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd40f809191p+0 +FE_UPWARD -0x1.5ebd3e8f5c27dp+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd40f80919p+0 +FE_DOWNWARD -0x1.5ebd3e8f5c27ep+0 +FE_UPWARD -0x1.5ebd40f80919p+0 +FE_TOWARDZERO -0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd40f80919p+0 +FE_DOWNWARD 0x1.5ebd40f80919p+0 +FE_UPWARD 0x1.5ebd3e8f5c27ep+0 +FE_TOWARDZERO 0x1.5ebd3e8f5c27dp+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD 0x1.5ebd3f7bc003ap+0 +FE_UPWARD 0x1.5ebd41c2288e5p+0 +FE_TOWARDZERO 0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD -0x1.5ebd3f7bc003bp+0 +FE_UPWARD -0x1.5ebd41c2288e4p+0 +FE_TOWARDZERO -0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD -0x1.5ebd41c2288e5p+0 +FE_UPWARD -0x1.5ebd3f7bc003ap+0 +FE_TOWARDZERO -0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd3f7bc003ap+0 +FE_DOWNWARD 0x1.5ebd41c2288e4p+0 +FE_UPWARD 0x1.5ebd3f7bc003bp+0 +FE_TOWARDZERO 0x1.5ebd3f7bc003ap+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd3ddf57ep+0 +FE_UPWARD 0x1.5ebd428e6d5cp+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd404804dp+0 +FE_UPWARD -0x1.5ebd4025c068p+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.5ebd404804dp+0 +FE_DOWNWARD -0x1.5ebd4025c068p+0 +FE_UPWARD -0x1.5ebd404804dp+0 +FE_TOWARDZERO -0x1.5ebd3ddf57ep+0 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.5ebd404804dp+0 +FE_DOWNWARD 0x1.5ebd428e6d5cp+0 +FE_UPWARD 0x1.5ebd3ddf57ep+0 +FE_TOWARDZERO 0x1.5ebd3ddf57ep+0 + +Testing: (d1 = (0x1.233445566778p0), d2 = (5)) -> d1 *d2 +FE_TONEAREST 0x1.6c0156ac0156p+2 +FE_DOWNWARD 0x1.6c0156ac0156p+2 +FE_UPWARD 0x1.6c0156ac0156p+2 +FE_TOWARDZERO 0x1.6c0156ac0156p+2 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (5)) -> d1 *d2 +FE_TONEAREST -0x1.6c0156ac0156p+2 +FE_DOWNWARD -0x1.6c0156ac0156p+2 +FE_UPWARD -0x1.6c0156ac0156p+2 +FE_TOWARDZERO -0x1.6c0156ac0156p+2 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(5)) -> d1 *d2 +FE_TONEAREST -0x1.6c0156ac0156p+2 +FE_DOWNWARD -0x1.6c0156ac0156p+2 +FE_UPWARD -0x1.6c0156ac0156p+2 +FE_TOWARDZERO -0x1.6c0156ac0156p+2 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(5)) -> d1 *d2 +FE_TONEAREST 0x1.6c0156ac0156p+2 +FE_DOWNWARD 0x1.6c0156ac0156p+2 +FE_UPWARD 0x1.6c0156ac0156p+2 +FE_TOWARDZERO 0x1.6c0156ac0156p+2 + +Testing: (d1 = (15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111111111102p+4 +FE_DOWNWARD 0x1.2111111111102p+4 +FE_UPWARD 0x1.2111111111102p+4 +FE_TOWARDZERO 0x1.2111111111102p+4 + +Testing: (d1 = -(15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111111111102p+4 +FE_DOWNWARD -0x1.2111111111102p+4 +FE_UPWARD -0x1.2111111111102p+4 +FE_TOWARDZERO -0x1.2111111111102p+4 + +Testing: (d1 = (15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111111111102p+4 +FE_DOWNWARD -0x1.2111111111102p+4 +FE_UPWARD -0x1.2111111111102p+4 +FE_TOWARDZERO -0x1.2111111111102p+4 + +Testing: (d1 = -(15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111111111102p+4 +FE_DOWNWARD 0x1.2111111111102p+4 +FE_UPWARD 0x1.2111111111102p+4 +FE_TOWARDZERO 0x1.2111111111102p+4 + +Testing: (d1 = (0x1.233445566778p0f), d2 = (15)) -> d1 *d2 +FE_TONEAREST 0x1.110101ap+4 +FE_DOWNWARD 0x1.1100ffcp+4 +FE_UPWARD 0x1.110101ap+4 +FE_TOWARDZERO 0x1.1100ffcp+4 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = (15)) -> d1 *d2 +FE_TONEAREST -0x1.110101ap+4 +FE_DOWNWARD -0x1.110101ap+4 +FE_UPWARD -0x1.1100ffcp+4 +FE_TOWARDZERO -0x1.1100ffcp+4 + +Testing: (d1 = (0x1.233445566778p0f), d2 = -(15)) -> d1 *d2 +FE_TONEAREST -0x1.110101ap+4 +FE_DOWNWARD -0x1.1100ffcp+4 +FE_UPWARD -0x1.110101ap+4 +FE_TOWARDZERO -0x1.1100ffcp+4 + +Testing: (d1 = -(0x1.233445566778p0f), d2 = -(15)) -> d1 *d2 +FE_TONEAREST 0x1.110101ap+4 +FE_DOWNWARD 0x1.110101ap+4 +FE_UPWARD 0x1.1100ffcp+4 +FE_TOWARDZERO 0x1.1100ffcp+4 + +Testing: (d1 = (15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111108p+4 +FE_DOWNWARD 0x1.2111108p+4 +FE_UPWARD 0x1.2111126p+4 +FE_TOWARDZERO 0x1.2111108p+4 + +Testing: (d1 = -(15), d2 = (0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111108p+4 +FE_DOWNWARD -0x1.2111108p+4 +FE_UPWARD -0x1.2111126p+4 +FE_TOWARDZERO -0x1.2111108p+4 + +Testing: (d1 = (15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST -0x1.2111108p+4 +FE_DOWNWARD -0x1.2111126p+4 +FE_UPWARD -0x1.2111108p+4 +FE_TOWARDZERO -0x1.2111108p+4 + +Testing: (d1 = -(15), d2 = -(0x1.3456789abcdep0f)) -> d1 *d2 +FE_TONEAREST 0x1.2111108p+4 +FE_DOWNWARD 0x1.2111126p+4 +FE_UPWARD 0x1.2111108p+4 +FE_TOWARDZERO 0x1.2111108p+4 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca44203ab9p-1 +FE_DOWNWARD -0x1.e38ca44203ab9p-1 +FE_UPWARD -0x1.e38ca44203ab8p-1 +FE_TOWARDZERO -0x1.e38ca44203ab8p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca44203ab9p-1 +FE_DOWNWARD 0x1.e38ca44203ab8p-1 +FE_UPWARD 0x1.e38ca44203ab9p-1 +FE_TOWARDZERO 0x1.e38ca44203ab8p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = (0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca534ae61p-1 +FE_DOWNWARD 0x1.e38ca534ae61p-1 +FE_UPWARD 0x1.e38ca211bd4adp-1 +FE_TOWARDZERO 0x1.e38ca534ae61p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = (0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca534ae61p-1 +FE_DOWNWARD -0x1.e38ca534ae611p-1 +FE_UPWARD -0x1.e38ca211bd4acp-1 +FE_TOWARDZERO -0x1.e38ca534ae61p-1 + +Testing: (d1 = (0x1.233445566778p0), d2 = -(0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST -0x1.e38ca534ae61p-1 +FE_DOWNWARD -0x1.e38ca211bd4adp-1 +FE_UPWARD -0x1.e38ca534ae61p-1 +FE_TOWARDZERO -0x1.e38ca534ae61p-1 + +Testing: (d1 = -(0x1.233445566778p0), d2 = -(0x1.3456789abcdep0f)) -> d1 / d2 +FE_TONEAREST 0x1.e38ca534ae61p-1 +FE_DOWNWARD 0x1.e38ca211bd4acp-1 +FE_UPWARD 0x1.e38ca534ae611p-1 +FE_TOWARDZERO 0x1.e38ca534ae61p-1 + +Testing: (d1 = (1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p-52 +FE_DOWNWARD 0x1p-52 +FE_UPWARD 0x1p-52 +FE_TOWARDZERO 0x1p-52 + +Testing: (d1 = -(1.0), d2 = (0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST 0x1p+1 +FE_DOWNWARD 0x1p+1 +FE_UPWARD 0x1.0000000000001p+1 +FE_TOWARDZERO 0x1p+1 + +Testing: (d1 = (1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p+1 +FE_DOWNWARD -0x1.0000000000001p+1 +FE_UPWARD -0x1p+1 +FE_TOWARDZERO -0x1p+1 + +Testing: (d1 = -(1.0), d2 = -(0x1.0000000000001p0)) -> d2 - d1 +FE_TONEAREST -0x1p-52 +FE_DOWNWARD -0x1p-52 +FE_UPWARD -0x1p-52 +FE_TOWARDZERO -0x1p-52 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 + d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d1 - d2 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = (1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.dfffffffffffep-1 +FE_DOWNWARD -0x1.dffffffffffffp-1 +FE_UPWARD -0x1.dfffffffffffep-1 +FE_TOWARDZERO -0x1.dfffffffffffep-1 + +Testing: (d1 = -(1.0), d2 = (0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.1000000000001p+0 +FE_DOWNWARD 0x1.1p+0 +FE_UPWARD 0x1.1000000000001p+0 +FE_TOWARDZERO 0x1.1p+0 + +Testing: (d1 = (1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST -0x1.1000000000001p+0 +FE_DOWNWARD -0x1.1000000000001p+0 +FE_UPWARD -0x1.1p+0 +FE_TOWARDZERO -0x1.1p+0 + +Testing: (d1 = -(1.0), d2 = -(0x1.000000000000dp-4)) -> d2 - d1 +FE_TONEAREST 0x1.dfffffffffffep-1 +FE_DOWNWARD 0x1.dfffffffffffep-1 +FE_UPWARD 0x1.dffffffffffffp-1 +FE_TOWARDZERO 0x1.dfffffffffffep-1 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1p+2, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1p+1 +FE_DOWNWARD ST0 = 0x1p+1 +FE_UPWARD ST0 = 0x1p+1 +FE_TOWARDZERO ST0 = 0x1p+1 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.0000000000001p+1, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.6a09e667f3bcdp+0 +FE_DOWNWARD ST0 = 0x1.6a09e667f3bcdp+0 +FE_UPWARD ST0 = 0x1.6a09e667f3bcep+0 +FE_TOWARDZERO ST0 = 0x1.6a09e667f3bcdp+0 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.123456789abcp+31, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.76b0aac9e6a5p+15 +FE_DOWNWARD ST0 = 0x1.76b0aac9e6a4fp+15 +FE_UPWARD ST0 = 0x1.76b0aac9e6a5p+15 +FE_TOWARDZERO ST0 = 0x1.76b0aac9e6a4fp+15 + +Testing X87 instruction: "FSQRT" (ST0 = 0x1.123456789abdp+31, ST1 = 0x0p+0) +FE_TONEAREST ST0 = 0x1.76b0aac9e6a5bp+15 +FE_DOWNWARD ST0 = 0x1.76b0aac9e6a5ap+15 +FE_UPWARD ST0 = 0x1.76b0aac9e6a5bp+15 +FE_TOWARDZERO ST0 = 0x1.76b0aac9e6a5ap+15 + diff --git a/tests/roundtest.h b/tests/roundtest.h new file mode 100644 index 00000000..320e22c9 --- /dev/null +++ b/tests/roundtest.h @@ -0,0 +1,114 @@ +#pragma STDC FENV_ACCESS ON +#include +#include + +#ifdef USE_ASM_ROUNDING +int fesetround_(int rounding_direction) { + uint16_t old_cw; + __asm__("FNSTCW %0" : "=m"(old_cw)::); + uint16_t new_cw = (old_cw & ~0xc00) | rounding_direction; + __asm__("FLDCW %0" ::"m"(new_cw)); + return old_cw & 0xc00; +} +int fegetround_() { + uint16_t cw; + __asm__("FNSTCW %0" : "=m"(cw)::); + return cw & 0xc00; +} +#define fesetround fesetround_ +#define fegetround fegetround_ +#define FE_TONEAREST 0 +#define FE_DOWNWARD 0x400 +#define FE_UPWARD 0x800 +#define FE_TOWARDZERO 0xc00 +#else +#include +#endif + +#define FE_TONEAREST_INDEX 0 +#define FE_DOWNWARD_INDEX 1 +#define FE_UPWARD_INDEX 2 +#define FE_TOWARDZERO_INDEX 3 +int FE_MODES[] = {FE_TONEAREST, FE_DOWNWARD, FE_UPWARD, FE_TOWARDZERO}; +char *FE_MODES_STR[] = { + "FE_TONEAREST", + "FE_DOWNWARD", + "FE_UPWARD", + "FE_TOWARDZERO", +}; + +void assert_round(double *array) { + assert(array[FE_DOWNWARD_INDEX] <= array[FE_TONEAREST_INDEX]); + assert(array[FE_TONEAREST_INDEX] <= array[FE_UPWARD_INDEX]); + if (array[FE_TOWARDZERO_INDEX] < 0) + assert(array[FE_TOWARDZERO_INDEX] == array[FE_UPWARD_INDEX]); + else if (array[FE_TOWARDZERO_INDEX] > 0) + assert(array[FE_TOWARDZERO_INDEX] == array[FE_DOWNWARD_INDEX]); + else if (array[FE_TOWARDZERO_INDEX] == 0) + assert(array[FE_TOWARDZERO_INDEX] == array[FE_UPWARD_INDEX] || + array[FE_TOWARDZERO_INDEX] == array[FE_DOWNWARD_INDEX]); +} + +#define TEST_(exec, expr, format) \ + do { \ + if (sizeof(#exec) == 1) \ + printf("Testing: %s\n", #expr); \ + else \ + printf("Testing: %s -> %s\n", #exec, #expr); \ + for (int i = 0; i < sizeof(FE_MODES) / sizeof(FE_MODES[0]); i++) { \ + fesetround(FE_MODES[i]); \ + exec; \ + printf("%-15s" format "\n", FE_MODES_STR[i], expr); \ + assert(FE_MODES[i] == fegetround()); \ + } \ + printf("\n"); \ + } while (0) + +#define TEST(exec, expr) TEST_(exec, expr, "%a") + +#if defined(i386) || defined(__i386__) || defined(__i386) || \ + defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +#define TEST_X87(instruction, st0, st1, deep_change) \ + do { \ + double _st0 = (st0), _st1 = (st1); \ + double array1[4], array2[4]; \ + double __st0, __st1; \ + printf("Testing X87 instruction: %s (ST0 = %a, ST1 = %a)\n", #instruction, \ + _st0, _st1); \ + for (int i = 0; i < sizeof(FE_MODES) / sizeof(FE_MODES[0]); i++) { \ + fesetround(FE_MODES[i]); \ + __st0 = _st0, __st1 = _st1; \ + switch (deep_change) { \ + case -1: /* the instruction pops */ \ + __asm__(instruction : "+t"(__st0) : "u"(__st1) : "st(1)"); \ + printf("%-15s ST0 = %a\n", FE_MODES_STR[i], __st0); \ + break; \ + case 0: \ + __asm__(instruction : "+t"(__st0) : "u"(__st1) :); \ + printf("%-15s ST0 = %a\n", FE_MODES_STR[i], __st0); \ + break; \ + case 1: /* the instruction pushes */ \ + __asm__(instruction : "+t"(__st0), "=u"(__st1)::); \ + printf("%-15s ST0 = %a, ST1 = %a\n", FE_MODES_STR[i], __st0, __st1); \ + array2[i] = __st1; \ + } \ + array1[i] = __st0; \ + assert(FE_MODES[i] == fegetround()); \ + } \ + if (deep_change == 1) \ + assert_round(array2); \ + assert_round(array1); \ + printf("\n"); \ + } while (0) +#else +#define TEST_X87(instruction, st0, st1, deep_change) \ + do { \ + double _st0 = (st0), _st1 = (st1); \ + printf("Cannot test X87 instruction: %s (ST0 = %a, ST1 = %a) because it " \ + "is not compiled to x86\n\n", \ + #instruction, _st0, _st1); \ + } while (0) +#endif + +#define TEST_X87_1(i, st0) TEST_X87(i, st0, 0.0, 0) +#define TEST_X87_2(i, st0, st1) TEST_X87(i, st0, st1, -1) diff --git a/tests/test26 b/tests/test26 new file mode 100755 index 00000000..4efc881e Binary files /dev/null and b/tests/test26 differ diff --git a/tests/test26.c b/tests/test26.c new file mode 100644 index 00000000..f2097a18 --- /dev/null +++ b/tests/test26.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#define USE_ASM_ROUNDING +#include "roundtest.h" + +// Build with +// `gcc -march=core2 -O0 -m32 test26.c -o test26 -std=c99 -masm=intel +// -mfpmath=387 -frounding-math` + +#define TEST_CONVERT_(stype, s_) \ + do { \ + stype s; \ + TEST_(s = (s_), (double)s, "%a"); \ + TEST_(s = (s_), (float)s, "%a"); \ + /* converting too large float to integer, the result is undefined, on both \ + * c99 and FISTP instruction */ \ + if (INT64_MIN <= s && INT64_MAX <= s) \ + TEST_(s = (s_), (int64_t)s, "%" PRId64); \ + if (INT32_MIN <= s && INT32_MAX <= s) \ + TEST_(s = (s_), (int32_t)s, "%" PRId32); \ + if (INT16_MIN <= s && INT16_MAX <= s) \ + TEST_(s = (s_), (int16_t)s, "%" PRId16); \ + if (INT8_MIN <= s && INT8_MAX <= s) \ + TEST_(s = (s_), (int8_t)s, "%" PRId8); \ + if (0 <= s && UINT64_MAX <= s) \ + TEST_(s = (s_), (uint64_t)s, "%" PRIu64); \ + if (0 <= s && UINT32_MAX <= s) \ + TEST_(s = (s_), (unsigned int)s, "%" PRIu32); \ + if (0 <= s && UINT16_MAX <= s) \ + TEST_(s = (s_), (unsigned short)s, "%" PRIu16); \ + if (0 <= s && UINT8_MAX <= s) \ + TEST_(s = (s_), (unsigned char)s, "%" PRIu8); \ + } while (0) + +#define TEST_CONVERT(stype, s_) \ + do { \ + TEST_CONVERT_(stype, s_); \ + TEST_CONVERT_(stype, -(s_)); \ + } while (0) + +#define TEST_2NUMBER(d1type, d1_, d2type, d2_, operation) \ + do { \ + d1type d1; \ + d2type d2; \ + TEST((d1 = (d1_), d2 = (d2_)), operation); \ + TEST((d1 = -(d1_), d2 = (d2_)), operation); \ + TEST((d1 = (d1_), d2 = -(d2_)), operation); \ + TEST((d1 = -(d1_), d2 = -(d2_)), operation); \ + } while (0) + +int main() { + double d; + float f; + int64_t i64; + TEST_CONVERT(double, 0x1.123456789abcp2); // FISTTP + TEST_(d = (0x1.123456789abcp512), (float)d, "%a"); + TEST_CONVERT(double, 0x1.123456789abcp29); + TEST_(d = (-0x1.123456789abcp30), (int32_t)d, "%" PRId32); + TEST_(d = (-0x1.123456789abcp62), (int64_t)d, "%" PRId64); + + TEST_CONVERT(float, 0x1.123456789abcp2f); + TEST_CONVERT(float, 0x1.123456789abcp29f); + TEST_(f = -0x1.123456789abcp30f, (int32_t)f, "%" PRId32); + // to be fixed: + //TEST_(f = -0x1.123456789abcp62f, (int64_t)f, "%" PRId64); + // The direction of rounding when an integer is converted to a floating-point + // number that cannot exactly represent the original value + // https://gcc.gnu.org/onlinedocs/gcc/Floating-point-implementation.html + // to be fixed: + //TEST_(i64 = INT64_MAX, (double)i64, "%a"); // FILD and FSTP + TEST(d = -0x1.1234567p0, (double)((int)d)); + TEST(d = 0x1.9234567p0, (double)((int)d)); + TEST(d = -0x1.9234567p0, (double)((int)d)); + + TEST(d = 0x1.1234567p0, (double)((long int)d)); + TEST(d = -0x1.1234567p0, (double)((long int)d)); + TEST(d = 0x1.9234567p0, (double)((long int)d)); + TEST(d = -0x1.9234567p0, (double)((long int)d)); + + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d2 - d1); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d2 - d1); + + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 + d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, float, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, double, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, float, 0x1.3456789abcdep0, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, float, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(double, 0x1.233445566778p0, int, 5, d1 *d2); + TEST_2NUMBER(int, 15, double, 0x1.3456789abcdep0f, d1 *d2); + TEST_2NUMBER(float, 0x1.233445566778p0f, int, 15, d1 *d2); + TEST_2NUMBER(int, 15, float, 0x1.3456789abcdep0f, d1 *d2); + + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 / d2); + TEST_2NUMBER(double, 0x1.233445566778p0, double, 0x1.3456789abcdep0, d1 / d2); + TEST_2NUMBER(double, 0x1.233445566778p0, float, 0x1.3456789abcdep0f, d1 / d2); + + TEST_2NUMBER(double, 1.0, double, 0x1.0000000000001p0, d2 - d1); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 + d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d1 - d2); + TEST_2NUMBER(double, 1.0, double, 0x1.000000000000dp-4, d2 - d1); + + TEST_X87_1("FSQRT", 0x1.0000000000000p2); + TEST_X87_1("FSQRT", 0x1.0000000000001p1); + TEST_X87_1("FSQRT", 0x1.123456789abcp31); + TEST_X87_1("FSQRT", 0x1.123456789abdp31); + + return 0; +}