From b655e89af08e30eeef1085b35bb4dc46349101d2 Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 22 Dec 2021 11:44:11 +0100 Subject: [PATCH] [DYNAREC] Fixed idiv optimisation, and added a new test about idiv opcode --- CMakeLists.txt | 7 ++++- src/dynarec/arm_emitter.h | 8 +++++- src/dynarec/dynarec_arm_00.c | 20 +++++++-------- tests/ref20.txt | 11 ++++++++ tests/test20 | Bin 0 -> 15544 bytes tests/test20.c | 48 +++++++++++++++++++++++++++++++++++ 6 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 tests/ref20.txt create mode 100755 tests/test20 create mode 100644 tests/test20.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 60292e75..ff08d1e2 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -738,11 +738,16 @@ add_test(NAME longjumpInSignals COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref18.txt -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) - add_test(NAME x87 COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} +add_test(NAME x87 COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test19 -D TEST_OUTPUT=tmpfile.txt -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref19.txt -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) +add_test(NAME idiv COMMAND ${CMAKE_COMMAND} -D TEST_PROGRAM=${CMAKE_BINARY_DIR}/${BOX86} + -D TEST_ARGS=${CMAKE_SOURCE_DIR}/tests/test20 -D TEST_OUTPUT=tmpfile.txt + -D TEST_REFERENCE=${CMAKE_SOURCE_DIR}/tests/ref20.txt + -P ${CMAKE_SOURCE_DIR}/runTest.cmake ) + file(GLOB extension_tests "${CMAKE_SOURCE_DIR}/tests/extensions/*.c") foreach(file ${extension_tests}) get_filename_component(testname "${file}" NAME_WE) diff --git a/src/dynarec/arm_emitter.h b/src/dynarec/arm_emitter.h index dbd93429..cd01b17e 100755 --- a/src/dynarec/arm_emitter.h +++ b/src/dynarec/arm_emitter.h @@ -245,9 +245,12 @@ Op is 20-27 // cmp.s dst, src, #imm #define CMPS_IMM8(src, imm8) \ EMIT(0xe3500000 | ((0) << 12) | ((src) << 16) | brIMM(imm8) ) -// cmn.s dst, src, #imm +// cmn.s.cond dst, src, #imm #define CMNS_IMM8_COND(cond, src, imm8) \ EMIT((cond) | 0x03700000 | ((0) << 12) | ((src) << 16) | brIMM(imm8) ) +// cmn.s dst, src, #imm +#define CMNS_IMM8(src, imm8) \ + EMIT(c__ | 0x03700000 | ((0) << 12) | ((src) << 16) | brIMM(imm8) ) // tst.s dst, src1, src2, lsl #imm #define TSTS_REG_LSL_IMM5(src1, src2, imm5) \ EMIT(0xe1100000 | ((0) << 12) | ((src1) << 16) | brLSL(imm5, src2) ) @@ -257,6 +260,9 @@ Op is 20-27 // tst.s dst, src1, #imm ror rot*2 #define TSTS_IMM8_ROR(src, imm8, rot) \ EMIT(0xe3100000 | ((0) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) +// tst.s.cond dst, src1, #imm ror rot*2 +#define TSTS_IMM8_ROR_COND(cond, src, imm8, rot) \ + EMIT((cond) | 0x03100000 | ((0) << 12) | ((src) << 16) | ((rot)<<8) | brIMM(imm8) ) // orr dst, src1, src2, lsl #imm #define ORR_REG_LSL_IMM5(dst, src1, src2, imm5) \ EMIT(0xe1800000 | ((dst) << 12) | ((src1) << 16) | brLSL(imm5, src2) ) diff --git a/src/dynarec/dynarec_arm_00.c b/src/dynarec/dynarec_arm_00.c index 96084ab4..fdbaa8bc 100755 --- a/src/dynarec/dynarec_arm_00.c +++ b/src/dynarec/dynarec_arm_00.c @@ -2683,31 +2683,29 @@ uintptr_t dynarec00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, GETED; SDIV(x2, xEAX, ed); // x1 = xEAX / ed MLS(xEDX, x2, ed, xEAX); // x14 = xEAX mod ed (i.e. xEAX - x1*ed) - TSTS_IMM8_ROR(xEDX, 0b10, 1); // test if reminder is negative - ADD_REG_LSL_IMM5_COND(cNE, xEDX, xEDX, ed, 0); // add ed if negative MOV_REG(xEAX, x2); } else { GETEDH(x1); - // disabling the use of SDIV for now. It breaks X3Reunion - // seems to be an issue with large number - // for example EDX=0xffffffff EAX=0xe7f30000 and div ecx=0x186a0 - // gives wrong modulo - if(arm_div && 0) { + if(arm_div) { + // check if a 32bits division is enough CMPS_IMM8(xEDX, 0); // compare to 0 - CMNS_IMM8_COND(cNE, xEDX, 1); // compare to FFFFFFFF if not 0 + TSTS_IMM8_ROR_COND(cEQ, xEAX, 0b10, 1); // also test that xEAX is not signed! B_MARK(cEQ); + CMNS_IMM8(xEDX, 1); // compare to FFFFFFFF if not 0 + B_MARK2(cNE); + TSTS_IMM8_ROR(xEAX, 0b10, 1); // also test that xEAX is signed! + B_MARK(cNE); + MARK2; } if(ed!=x1) {MOV_REG(x1, ed);} STM(xEmu, (1< 0x0 / 0x9000 (47185920/1280 => 36864 + 0) +SDHLD/SAR/IDIV 0xfffffa5d 0x186a0 => 0xfffef4a0 / 0xfffffc4f (281474882142208/100000 => -945 + -68448) +SDHLD/SAR/IDIV 0x1701 0x186a0 => 0xa220 / 0xf13 (385941504/100000 => 3859 + 41504) +SDHLD/SAR/IDIV 0xffff9a19 0x186a0 => 0xffff6d00 / 0xffffbd38 (281473267073024/100000 => -17096 + -37632) +SDHLD/SAR/IDIV 0xffffe7f3 0x186a0 => 0xffffebe0 / 0xfffff03d (281474573205504/100000 => -4035 + -5152) +SDHLD/SAR/IDIV 0x15840 0x186a0 => 0xdd20 / 0xe19b (5775556608/100000 => 57755 + 56608) +SDHLD/SAR/IDIV 0xff451330 0x186a0 => 0xfffee8a0 / 0xff857f2f (280672139739136/100000 => -8028369 + -71520) +SDHLD/SAR/IDIV 0xffff626a 0x186a0 => 0xffff2fc0 / 0xffff98ba (281472332857344/100000 => -26438 + -53312) +SDHLD/SAR/IDIV 0x9120 0x186a0 => 0x16d20 / 0x5f1b (2434793472/100000 => 24347 + 93472) + +Done diff --git a/tests/test20 b/tests/test20 new file mode 100755 index 0000000000000000000000000000000000000000..d329fa9d6fa8a025ef13eb5a1b0762440adbcfa1 GIT binary patch literal 15544 zcmeHOe{ft?6~6m}HqyA+mWcdlUA{6mnbRK_}kqhry*am2I>r810Ds?Ol(jCFuvD^XIZ5FzsXzW3e@ zFE$P{{-^U!=Dc&yJ?EZ#?|1Lp-22|U_eW!GE|*J4@(7Qh7F;aE4Uly=nwA@cA{L32 z;!|RoSb!|rB=1B4vKM(|5BMN-$(KMU(}rw=A!g|n6d=vVmo?iCin-h;Ox$3b;TUeg)8G}9~`m?_z$~rLZ29}CYkTmkX|Akj4n6}dn zo1!5737ftI>7_P(Inu0K6774W0;zznx9PO+x9JMfY?l`a_G2-lsU5egc`cF3sD*qo zAw~*XUX0{Y8ErtQYJV}V4x}>ibn2cYRAAN*=Xg$2hvTV?pib=8iH)(24Xx_hKuh4$ zv*{pW_iR!V$y{`Jc^%PEpz1M@CAu#Bxy3C= zNi6bLr9Q?{*l6YXQ>l-$OgD5A=P1WyTFTo@eQC&)lh79cm?=sO=6OmC-VEhJ%p3SE zMAd#u0M|%qOmyLpgMfp8gMfp8gMfp8gMfp8gMfp8gTNdFx=WLFwLxq!Myu~O3sEke z*L8j0gyz;ym7c5HUe%9sH>Mxv9)+YIuE%wV$+KtRsvqV~XNcyrrwpCD)FEb`oiOw% zTnEOX8&rukc}uTN>0g@JI`(>RC0g57`O2a0+UqcmRtspsmY;HKSeh)3MGsU;b*N)3 zy1!CdiA3e-PD6e0?I$aRLzSN}VEL=w(xkU^qOROF-P@@b_K!sm)_PIp9bfF#3kPcl z(6z3yms?JEjz!1I(SyAW-#P9-cE;ZY-|;Pa;lbKcDu*U+xGD4|M%}d1({M$-lYUz5~9K3sfrl1N@xVBv4P@+AH0h4}|yZ_oU^bj#> zpMm8>iH@$h@y>3CrQ9eX7;l*<_cVFSv8K9)r%w5gz2|@TAJv9M5*^fE+m0Zk&)<#8 zd$(bB){nhezOQLzt^1#t0NY-oEj?Gy5v*2ri)wjwQx!EB^F9l&(W<^0{p4InwnGjA4gwAW4gwAW4gwAW4gwAW z4gwAW|F;O>m7?*^vOCfqi!^tKyP7*99ov*(aYfO*h>Ih5FDh=i)l{2Jv2sN^ooHT> zP>_%7^|-9Mcs`I$@wF)4SMGRE*AG2}_oJT_g7Wfjn)=&sm@;&S%g7I25;gqp?lbe1 zubcj^D?RJm5sD8Qif`#UA1FKn`84E7$Ui{t$Nn@y{hY3!gsed-*#EcxP1nmf+tt84Ssf4*Un z<}Hfbu2_HF+NR}(xW;??u&-{PJLao@*c0~o zAM%EMjirlPeM-vL6!xu*_*O@J!A(A8qpvYs_4&gmeD&cMe0AaHeR$l%Hco?ogIIve zAm*V;aSVBUBIJ;RfP;X8fP;X8fP;X8fP;X8fP;X8!2bdQJMo6u59Z#6-|F&>Ha9)^ zFjq_?|8huvmwP27_c~WFA3FDY)c^j0o}ENtY-`11*x&MV+j>ZDfS&*_0&~A4znA77 zo0~kih|9p-Q{wY_fvvEZfRX9FkbLnS0`psW?ib(J^(>QkOPTG29dA;zY%{iC*;}Dw zOP&31_hKV#Ac)_;OInA#Ht;sfwlswA$Xi=)R#rxmeW`dxX$c1K8TFcVtE`l=Hn_GW z*wPY|5#~yM|4%Yc6qw)Bn>vvY;-y(TIs+o6B#(iziK(d*X%lNxH$QMD2BwY=p+Urm zi~b%&A$sS&(y#T=`;J8a`ypljUFiS6*Z(-|pIc6EmbVI;DrUzyozFUCeBLoXhc5AQ0k0F# zWqjIy2c73NOj;}^nFyJh3%&EZk+#O$jrrh4N2X9M`?Cb?U19@FL0$m*2=L(jx5Vue zPePaYE5JWu^JnX~LO*A9gZ{gq6EBh<{(QC~=hG5*gUCTo+WhxJmqb_&Lznfj{10t* z7W;)wSD^n2I`3!FV)td@_t3qH*?yk2__NKQ;9{hPm zh|kGrzm)hXAI4h-T?ru%ME_OLdH*~Mo&Euvo$a|9x+DdO9{Qtx&UE?&Xhwkg|kM<+=vKdvRX1Qm?;GM3Mrh- zN+qnU@P>{xT6|C#wxM`_NCXmlGKgnNEoWMGCv*8!HZw<2Vap}c^fwbDX-x!-ZUvAG z3}&Hf$s(?tn()aQ;}S>?S(9-nfe=zNi<-02EFsgVAU>SxN99?Kk_mvk4H>n{3n zy+P*hqpX+tHWd2vD zAO9CizXM3Kt*i$>v<53oTSPz7!(i#Bz>gfF!6Fu=?;~L8w-cFUwnq}vHpyON7y+Q^y7Mj=S-)pJP0Ee z@%sjt&7mLHb(75(;=+1sHa}y)g62Fxwn4Ie=b*`WTo+shKh`6Oex!H7%xAqy2n;`I zLzWSPa>h!-Y!LmfwbB%eyR9H&k+{UP P`*}W#IWLW{$>hHP)s$x| literal 0 HcmV?d00001 diff --git a/tests/test20.c b/tests/test20.c new file mode 100644 index 00000000..017e925c --- /dev/null +++ b/tests/test20.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include + +#if defined(__x86_64__) +#error Nope! +#else +__attribute__((naked)) uint64_t _idiv_(uint32_t a, uint32_t b) +{ + asm volatile ( + "xor %%eax, %%eax\n" + "mov 4(%%esp), %%edx\n" + "mov 8(%%esp), %%ecx\n" + "shrd $0x10, %%edx, %%eax\n" + "sar $0x10, %%edx\n" + "idiv %%ecx\n" + "ret" + :::); +} +#endif + +int main(int argc, const char** argv) +{ + uint32_t tests[][2] = { + {0x000002d0, 0x00000500}, + {0xfffffa5d, 0x000186a0}, + {0x00001701, 0x000186a0}, + {0xffff9a19, 0x000186a0}, + {0xffffe7f3, 0x000186a0}, + {0x00015840, 0x000186a0}, + {0xff451330, 0x000186a0}, + {0xffff626a, 0x000186a0}, + {0x00009120, 0x000186a0}, + }; + int n = sizeof(tests)/sizeof(tests[0]); + uint64_t res; + for(int i=0; i> 32; + printf("=> 0x%x / 0x%x (%lld/%d => %d + %d)\n", modo, divi, ((int64_t)tests[i][0])<<16, tests[i][1], divi, modo); + } + printf("\nDone\n"); +}