mirror of
https://github.com/ptitSeb/box64.git
synced 2024-11-23 22:49:43 +00:00
Improved CMPSS/CMPSD opcodes, improved test17 ([DYNAREC] too, and improved MINSS/MAXSS/MINSD/MAXSD too)
This commit is contained in:
parent
f73e4193f0
commit
f5c9439f91
@ -49,7 +49,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
uint8_t gd, ed;
|
||||
uint8_t wback;
|
||||
uint8_t u8;
|
||||
uint64_t u64;
|
||||
uint64_t u64, j64;
|
||||
int v0, v1;
|
||||
int q0;
|
||||
int d0, d1;
|
||||
@ -214,9 +214,15 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
v0 = sse_get_reg(dyn, ninst, x1, gd);
|
||||
GETEX(v1, 0);
|
||||
// MINSD: if any input is NaN, or Ex[0]<Gx[0], copy Ex[0] -> Gx[0]
|
||||
#if 0
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
FMINNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem?
|
||||
VMOVeD(v0, 0, d0, 0); // to not erase uper part
|
||||
#else
|
||||
FCMPD(v0, v1);
|
||||
B_NEXT(cLS); //Less than or equal
|
||||
VMOVeD(v0, 0, v1, 0); // to not erase uper part
|
||||
#endif
|
||||
break;
|
||||
case 0x5E:
|
||||
INST_NAME("DIVSD Gx, Ex");
|
||||
@ -234,9 +240,15 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
v0 = sse_get_reg(dyn, ninst, x1, gd);
|
||||
GETEX(v1, 0);
|
||||
// MAXSD: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0]
|
||||
#if 0
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
FMAXNMD(d0, v0, v1); // NaN handling may be slightly different, is that a problem?
|
||||
VMOVeD(v0, 0, d0, 0); // to not erase uper part
|
||||
#else
|
||||
FCMPD(v0, v1);
|
||||
B_NEXT(cGE); //Greater than or equal
|
||||
VMOVeD(v0, 0, v1, 0); // to not erase uper part
|
||||
#endif
|
||||
break;
|
||||
|
||||
case 0x70:
|
||||
@ -282,19 +294,15 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
GETGX(v0);
|
||||
GETEX(v1, 1);
|
||||
u8 = F8;
|
||||
if((u8&7)==6){
|
||||
FCMPD(v1, v0);
|
||||
} else {
|
||||
FCMPD(v0, v1);
|
||||
}
|
||||
FCMPD(v0, v1);
|
||||
switch(u8&7) {
|
||||
case 0: CSETMx(x2, cEQ); CSELx(x2, xZR, x2, cVS); break; // Equal
|
||||
case 1: CSETMx(x2, cMI); CSELx(x2, xZR, x2, cVS); break; // Less than
|
||||
case 2: CSETMx(x2, cLE); CSELx(x2, xZR, x2, cVS); break; // Less or equal
|
||||
case 0: CSETMx(x2, cEQ); break; // Equal
|
||||
case 1: CSETMx(x2, cCC); break; // Less than
|
||||
case 2: CSETMx(x2, cLS); break; // Less or equal
|
||||
case 3: CSETMx(x2, cVS); break; // NaN
|
||||
case 4: CSETMx(x2, cNE); break; // Not Equal
|
||||
case 4: CSETMx(x2, cNE); break; // Not Equal or unordered
|
||||
case 5: CSETMx(x2, cCS); break; // Greater or equal or unordered
|
||||
case 6: CSETMx(x2, cLT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
|
||||
case 6: CSETMx(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
|
||||
case 7: CSETMx(x2, cVC); break; // not NaN
|
||||
}
|
||||
VMOVQDfrom(v0, 0, x2);
|
||||
|
@ -259,9 +259,15 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
GETGX(v0);
|
||||
GETEX(v1, 0);
|
||||
// MINSS: if any input is NaN, or Ex[0]<Gx[0], copy Ex[0] -> Gx[0]
|
||||
#if 0
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
FMINNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem?
|
||||
VMOVeS(v0, 0, d0, 0); // to not erase uper part
|
||||
#else
|
||||
FCMPS(v0, v1);
|
||||
B_NEXT(cLS); //Less than or equal
|
||||
VMOVeS(v0, 0, v1, 0); // to not erase uper part
|
||||
#endif
|
||||
break;
|
||||
case 0x5E:
|
||||
INST_NAME("DIVSS Gx, Ex");
|
||||
@ -278,9 +284,15 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
GETGX(v0);
|
||||
GETEX(v1, 0);
|
||||
// MAXSS: if any input is NaN, or Ex[0]>Gx[0], copy Ex[0] -> Gx[0]
|
||||
#if 0
|
||||
d0 = fpu_get_scratch(dyn);
|
||||
FMAXNMS(d0, v0, v1); // NaN handling may be slightly different, is that a problem?
|
||||
VMOVeS(v0, 0, d0, 0); // to not erase uper part
|
||||
#else
|
||||
FCMPS(v0, v1);
|
||||
B_NEXT(cGE); //Greater than or equal
|
||||
VMOVeS(v0, 0, v1, 0); // to not erase uper part
|
||||
#endif
|
||||
break;
|
||||
|
||||
case 0x6F:
|
||||
@ -384,19 +396,15 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
|
||||
GETGX(v0);
|
||||
GETEX(v1, 1);
|
||||
u8 = F8;
|
||||
if((u8&7)==6){
|
||||
FCMPS(v1, v0);
|
||||
} else {
|
||||
FCMPS(v0, v1);
|
||||
}
|
||||
FCMPS(v0, v1);
|
||||
switch(u8&7) {
|
||||
case 0: CSETMw(x2, cEQ); CSELw(x2, xZR, x2, cVS); break; // Equal
|
||||
case 1: CSETMw(x2, cMI); CSELw(x2, xZR, x2, cVS); break; // Less than
|
||||
case 2: CSETMw(x2, cLE); CSELw(x2, xZR, x2, cVS); break; // Less or equal
|
||||
case 0: CSETMw(x2, cEQ); break; // Equal
|
||||
case 1: CSETMw(x2, cCC); break; // Less than
|
||||
case 2: CSETMw(x2, cLS); break; // Less or equal
|
||||
case 3: CSETMw(x2, cVS); break; // NaN
|
||||
case 4: CSETMw(x2, cNE); break; // Not Equal
|
||||
case 4: CSETMw(x2, cNE); break; // Not Equal or unordered
|
||||
case 5: CSETMw(x2, cCS); break; // Greater or equal or unordered
|
||||
case 6: CSETMw(x2, cLT); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
|
||||
case 6: CSETMw(x2, cHI); break; // Greater or unordered, test inverted, N!=V so unordered or less than (inverted)
|
||||
case 7: CSETMw(x2, cVC); break; // not NaN
|
||||
}
|
||||
VMOVQSfrom(v0, 0, x2);
|
||||
|
@ -218,10 +218,10 @@ int RunF20F(x64emu_t *emu, rex_t rex)
|
||||
tmp8s = 0;
|
||||
switch(tmp8u&7) {
|
||||
case 0: tmp8s=(GX->d[0] == EX->d[0]); break;
|
||||
case 1: tmp8s=isless(GX->d[0], EX->d[0]); break;
|
||||
case 2: tmp8s=islessequal(GX->d[0], EX->d[0]); break;
|
||||
case 1: tmp8s=isless(GX->d[0], EX->d[0]) && !(isnan(GX->d[0]) || isnan(EX->d[0])); break;
|
||||
case 2: tmp8s=islessequal(GX->d[0], EX->d[0]) && !(isnan(GX->d[0]) || isnan(EX->d[0])); break;
|
||||
case 3: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]); break;
|
||||
case 4: tmp8s=(GX->d[0] != EX->d[0]); break;
|
||||
case 4: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]) || (GX->d[0] != EX->d[0]); break;
|
||||
case 5: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]) || isgreaterequal(GX->d[0], EX->d[0]); break;
|
||||
case 6: tmp8s=isnan(GX->d[0]) || isnan(EX->d[0]) || isgreater(GX->d[0], EX->d[0]); break;
|
||||
case 7: tmp8s=!isnan(GX->d[0]) && !isnan(EX->d[0]); break;
|
||||
|
@ -318,10 +318,10 @@ int RunF30F(x64emu_t *emu, rex_t rex)
|
||||
tmp8s = 0;
|
||||
switch(tmp8u&7) {
|
||||
case 0: tmp8s=(GX->f[0] == EX->f[0]); break;
|
||||
case 1: tmp8s=isless(GX->f[0], EX->f[0]); break;
|
||||
case 2: tmp8s=islessequal(GX->f[0], EX->f[0]); break;
|
||||
case 1: tmp8s=isless(GX->f[0], EX->f[0]) && !(isnan(GX->f[0]) || isnan(EX->f[0])); break;
|
||||
case 2: tmp8s=islessequal(GX->f[0], EX->f[0]) && !(isnan(GX->f[0]) || isnan(EX->f[0])); break;
|
||||
case 3: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]); break;
|
||||
case 4: tmp8s=(GX->f[0] != EX->f[0]); break;
|
||||
case 4: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]) || (GX->f[0] != EX->f[0]); break;
|
||||
case 5: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]) || isgreaterequal(GX->f[0], EX->f[0]); break;
|
||||
case 6: tmp8s=isnan(GX->f[0]) || isnan(EX->f[0]) || isgreater(GX->f[0], EX->f[0]); break;
|
||||
case 7: tmp8s=!isnan(GX->f[0]) && !isnan(EX->f[0]); break;
|
||||
|
166
tests/ref17.txt
166
tests/ref17.txt
@ -1,12 +1,154 @@
|
||||
div 1, 1 => 1 / 0
|
||||
div 10, 5 => 2 / 0
|
||||
div 10, 3 => 3 / 1
|
||||
div 1, 18446744073709551615 => 0 / 1
|
||||
div 10, 18446744073709551613 => 0 / 10
|
||||
div 18446744073709551606, 18446744073709551613 => 0 / 18446744073709551606
|
||||
idiv 1, 1 => 1 / 0
|
||||
idiv 10, 5 => 2 / 0
|
||||
idiv 10, 3 => 3 / 1
|
||||
idiv 1, -1 => -1 / 0
|
||||
idiv 10, -3 => -3 / 1
|
||||
idiv -10, -3 => 3 / -1
|
||||
ucomiss 1.000000, 2.000000 => 0x202
|
||||
ucomiss 2.000000, 1.000000 => 0x203
|
||||
ucomiss 1.000000, inf => 0x202
|
||||
ucomiss inf, 1.000000 => 0x203
|
||||
ucomiss 1.000000, -inf => 0x203
|
||||
ucomiss -inf, 1.000000 => 0x202
|
||||
ucomiss 1.000000, nan => 0x247
|
||||
ucomiss nan, 1.000000 => 0x247
|
||||
ucomiss 1.000000, 1.000000 => 0x242
|
||||
ucomiss 1.000000, 1.000000 => 0x242
|
||||
ucomiss inf, inf => 0x242
|
||||
ucomiss -inf, inf => 0x202
|
||||
ucomiss inf, -inf => 0x203
|
||||
ucomiss nan, nan => 0x247
|
||||
minss 1, 2 => 1
|
||||
minss 2, 1 => 1
|
||||
minss -inf, 2 => -inf
|
||||
minss 2, -inf => -inf
|
||||
minss inf, 2 => 2
|
||||
minss 2, inf => 2
|
||||
minss nan, 2 => 2
|
||||
minss 2, nan => nan
|
||||
minss nan, 3.40282e+38 => 3.40282e+38
|
||||
minss 3.40282e+38, nan => nan
|
||||
minss -inf, 3.40282e+38 => -inf
|
||||
minss 3.40282e+38, -inf => -inf
|
||||
minss inf, 3.40282e+38 => 3.40282e+38
|
||||
minss 3.40282e+38, inf => 3.40282e+38
|
||||
maxss 1, 2 => 2
|
||||
maxss 2, 1 => 2
|
||||
maxss -inf, 2 => 2
|
||||
maxss 2, -inf => 2
|
||||
maxss inf, 2 => inf
|
||||
maxss 2, inf => inf
|
||||
maxss nan, 2 => 2
|
||||
maxss 2, nan => nan
|
||||
maxss nan, 3.40282e+38 => 3.40282e+38
|
||||
maxss 3.40282e+38, nan => nan
|
||||
maxss -inf, 3.40282e+38 => 3.40282e+38
|
||||
maxss 3.40282e+38, -inf => 3.40282e+38
|
||||
maxss inf, 3.40282e+38 => inf
|
||||
maxss 3.40282e+38, inf => inf
|
||||
cmpss 0 1.000000, 2.000000 => 0x0
|
||||
cmpss 0 2.000000, 1.000000 => 0x0
|
||||
cmpss 0 1.000000, inf => 0x0
|
||||
cmpss 0 inf, 1.000000 => 0x0
|
||||
cmpss 0 1.000000, -inf => 0x0
|
||||
cmpss 0 -inf, 1.000000 => 0x0
|
||||
cmpss 0 1.000000, nan => 0x0
|
||||
cmpss 0 nan, 1.000000 => 0x0
|
||||
cmpss 0 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 0 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 0 inf, inf => 0xffffffff
|
||||
cmpss 0 -inf, inf => 0x0
|
||||
cmpss 0 inf, -inf => 0x0
|
||||
cmpss 0 nan, nan => 0x0
|
||||
cmpss 1 1.000000, 2.000000 => 0xffffffff
|
||||
cmpss 1 2.000000, 1.000000 => 0x0
|
||||
cmpss 1 1.000000, inf => 0xffffffff
|
||||
cmpss 1 inf, 1.000000 => 0x0
|
||||
cmpss 1 1.000000, -inf => 0x0
|
||||
cmpss 1 -inf, 1.000000 => 0xffffffff
|
||||
cmpss 1 1.000000, nan => 0x0
|
||||
cmpss 1 nan, 1.000000 => 0x0
|
||||
cmpss 1 1.000000, 1.000000 => 0x0
|
||||
cmpss 1 1.000000, 1.000000 => 0x0
|
||||
cmpss 1 inf, inf => 0x0
|
||||
cmpss 1 -inf, inf => 0xffffffff
|
||||
cmpss 1 inf, -inf => 0x0
|
||||
cmpss 1 nan, nan => 0x0
|
||||
cmpss 2 1.000000, 2.000000 => 0xffffffff
|
||||
cmpss 2 2.000000, 1.000000 => 0x0
|
||||
cmpss 2 1.000000, inf => 0xffffffff
|
||||
cmpss 2 inf, 1.000000 => 0x0
|
||||
cmpss 2 1.000000, -inf => 0x0
|
||||
cmpss 2 -inf, 1.000000 => 0xffffffff
|
||||
cmpss 2 1.000000, nan => 0x0
|
||||
cmpss 2 nan, 1.000000 => 0x0
|
||||
cmpss 2 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 2 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 2 inf, inf => 0xffffffff
|
||||
cmpss 2 -inf, inf => 0xffffffff
|
||||
cmpss 2 inf, -inf => 0x0
|
||||
cmpss 2 nan, nan => 0x0
|
||||
cmpss 3 1.000000, 2.000000 => 0x0
|
||||
cmpss 3 2.000000, 1.000000 => 0x0
|
||||
cmpss 3 1.000000, inf => 0x0
|
||||
cmpss 3 inf, 1.000000 => 0x0
|
||||
cmpss 3 1.000000, -inf => 0x0
|
||||
cmpss 3 -inf, 1.000000 => 0x0
|
||||
cmpss 3 1.000000, nan => 0xffffffff
|
||||
cmpss 3 nan, 1.000000 => 0xffffffff
|
||||
cmpss 3 1.000000, 1.000000 => 0x0
|
||||
cmpss 3 1.000000, 1.000000 => 0x0
|
||||
cmpss 3 inf, inf => 0x0
|
||||
cmpss 3 -inf, inf => 0x0
|
||||
cmpss 3 inf, -inf => 0x0
|
||||
cmpss 3 nan, nan => 0xffffffff
|
||||
cmpss 4 1.000000, 2.000000 => 0xffffffff
|
||||
cmpss 4 2.000000, 1.000000 => 0xffffffff
|
||||
cmpss 4 1.000000, inf => 0xffffffff
|
||||
cmpss 4 inf, 1.000000 => 0xffffffff
|
||||
cmpss 4 1.000000, -inf => 0xffffffff
|
||||
cmpss 4 -inf, 1.000000 => 0xffffffff
|
||||
cmpss 4 1.000000, nan => 0xffffffff
|
||||
cmpss 4 nan, 1.000000 => 0xffffffff
|
||||
cmpss 4 1.000000, 1.000000 => 0x0
|
||||
cmpss 4 1.000000, 1.000000 => 0x0
|
||||
cmpss 4 inf, inf => 0x0
|
||||
cmpss 4 -inf, inf => 0xffffffff
|
||||
cmpss 4 inf, -inf => 0xffffffff
|
||||
cmpss 4 nan, nan => 0xffffffff
|
||||
cmpss 5 1.000000, 2.000000 => 0x0
|
||||
cmpss 5 2.000000, 1.000000 => 0xffffffff
|
||||
cmpss 5 1.000000, inf => 0x0
|
||||
cmpss 5 inf, 1.000000 => 0xffffffff
|
||||
cmpss 5 1.000000, -inf => 0xffffffff
|
||||
cmpss 5 -inf, 1.000000 => 0x0
|
||||
cmpss 5 1.000000, nan => 0xffffffff
|
||||
cmpss 5 nan, 1.000000 => 0xffffffff
|
||||
cmpss 5 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 5 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 5 inf, inf => 0xffffffff
|
||||
cmpss 5 -inf, inf => 0x0
|
||||
cmpss 5 inf, -inf => 0xffffffff
|
||||
cmpss 5 nan, nan => 0xffffffff
|
||||
cmpss 6 1.000000, 2.000000 => 0x0
|
||||
cmpss 6 2.000000, 1.000000 => 0xffffffff
|
||||
cmpss 6 1.000000, inf => 0x0
|
||||
cmpss 6 inf, 1.000000 => 0xffffffff
|
||||
cmpss 6 1.000000, -inf => 0xffffffff
|
||||
cmpss 6 -inf, 1.000000 => 0x0
|
||||
cmpss 6 1.000000, nan => 0xffffffff
|
||||
cmpss 6 nan, 1.000000 => 0xffffffff
|
||||
cmpss 6 1.000000, 1.000000 => 0x0
|
||||
cmpss 6 1.000000, 1.000000 => 0x0
|
||||
cmpss 6 inf, inf => 0x0
|
||||
cmpss 6 -inf, inf => 0x0
|
||||
cmpss 6 inf, -inf => 0xffffffff
|
||||
cmpss 6 nan, nan => 0xffffffff
|
||||
cmpss 7 1.000000, 2.000000 => 0xffffffff
|
||||
cmpss 7 2.000000, 1.000000 => 0xffffffff
|
||||
cmpss 7 1.000000, inf => 0xffffffff
|
||||
cmpss 7 inf, 1.000000 => 0xffffffff
|
||||
cmpss 7 1.000000, -inf => 0xffffffff
|
||||
cmpss 7 -inf, 1.000000 => 0xffffffff
|
||||
cmpss 7 1.000000, nan => 0x0
|
||||
cmpss 7 nan, 1.000000 => 0x0
|
||||
cmpss 7 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 7 1.000000, 1.000000 => 0xffffffff
|
||||
cmpss 7 inf, inf => 0xffffffff
|
||||
cmpss 7 -inf, inf => 0xffffffff
|
||||
cmpss 7 inf, -inf => 0xffffffff
|
||||
cmpss 7 nan, nan => 0x0
|
||||
|
BIN
tests/test17
BIN
tests/test17
Binary file not shown.
204
tests/test17.c
204
tests/test17.c
@ -6,45 +6,195 @@
|
||||
#include <math.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
uint64_t _div_(uint64_t a, uint64_t b, uint64_t *r)
|
||||
uint64_t _ucomiss_(float a, float b)
|
||||
{
|
||||
uint64_t ret, rem;
|
||||
uint64_t ret;
|
||||
asm volatile (
|
||||
"xor %%rdx, %%rdx\n"
|
||||
"div %%rcx\n"
|
||||
"mov %%rdx, %%rbx\n"
|
||||
:"=a" (ret), "=b" (rem):"a" (a), "c" (b):"rdx","cc");
|
||||
*r = rem;
|
||||
"ucomiss %%xmm0, %%xmm1\n"
|
||||
"pushf\n"
|
||||
"pop %%rax"
|
||||
:"=a" (ret)::"xmm0","xmm1","cc");
|
||||
return ret;
|
||||
}
|
||||
uint64_t _idiv_(uint64_t a, uint64_t b, uint64_t *r)
|
||||
uint64_t _minss_(float a, float b)
|
||||
{
|
||||
uint64_t ret, rem;
|
||||
uint64_t ret;
|
||||
asm volatile (
|
||||
"cqo\n"
|
||||
"idiv %%rcx\n"
|
||||
"mov %%rdx, %%rbx\n"
|
||||
:"=a" (ret), "=b" (rem):"a" (a), "c" (b):"rdx","cc");
|
||||
*r = rem;
|
||||
"minss %%xmm1, %%xmm0\n"
|
||||
"movd %%xmm0, %%eax"
|
||||
:"=a" (ret)::"xmm0","xmm1","cc");
|
||||
return ret;
|
||||
}
|
||||
uint64_t _maxss_(float a, float b)
|
||||
{
|
||||
uint64_t ret;
|
||||
asm volatile (
|
||||
"maxss %%xmm1, %%xmm0\n"
|
||||
"movd %%xmm0, %%eax"
|
||||
:"=a" (ret)::"xmm0","xmm1","cc");
|
||||
return ret;
|
||||
}
|
||||
#define CMPSS(A) \
|
||||
uint64_t _cmpss_##A(float a, float b) \
|
||||
{ \
|
||||
uint64_t ret; \
|
||||
asm volatile ( \
|
||||
"cmpss $" #A ", %%xmm1, %%xmm0\n" \
|
||||
"movd %%xmm0, %%eax" \
|
||||
:"=a" (ret)::"xmm0","xmm1","cc"); \
|
||||
return ret; \
|
||||
}
|
||||
#else
|
||||
uint64_t _ucomiss_(float a, float b)
|
||||
{
|
||||
uint32_t ret;
|
||||
asm volatile (
|
||||
"movss %1, %%xmm0\n"
|
||||
"movss %2, %%xmm1\n"
|
||||
"ucomiss %%xmm0, %%xmm1\n"
|
||||
"pushf\n"
|
||||
"pop %%eax"
|
||||
:"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc");
|
||||
return ret;
|
||||
}
|
||||
uint64_t _minss_(float a, float b)
|
||||
{
|
||||
uint32_t ret;
|
||||
asm volatile (
|
||||
"movss %1, %%xmm0\n"
|
||||
"movss %2, %%xmm1\n"
|
||||
"minss %%xmm1, %%xmm0\n"
|
||||
"movd %%xmm0, %%eax"
|
||||
:"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc");
|
||||
return ret;
|
||||
}
|
||||
uint64_t _maxss_(float a, float b)
|
||||
{
|
||||
uint32_t ret;
|
||||
asm volatile (
|
||||
"movss %1, %%xmm0\n"
|
||||
"movss %2, %%xmm1\n"
|
||||
"maxss %%xmm1, %%xmm0\n"
|
||||
"movd %%xmm0, %%eax"
|
||||
:"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc");
|
||||
return ret;
|
||||
}
|
||||
#define CMPSS(A) \
|
||||
uint64_t _cmpss_##A(float a, float b) \
|
||||
{ \
|
||||
uint32_t ret; \
|
||||
asm volatile ( \
|
||||
"movss %1, %%xmm0\n" \
|
||||
"movss %2, %%xmm1\n" \
|
||||
"cmpss $" #A ", %%xmm1, %%xmm0\n" \
|
||||
"movd %%xmm0, %%eax" \
|
||||
:"=a" (ret):"m"(a), "m"(b):"xmm0", "xmm1", "cc"); \
|
||||
return ret; \
|
||||
}
|
||||
#endif
|
||||
CMPSS(0)
|
||||
CMPSS(1)
|
||||
CMPSS(2)
|
||||
CMPSS(3)
|
||||
CMPSS(4)
|
||||
CMPSS(5)
|
||||
CMPSS(6)
|
||||
CMPSS(7)
|
||||
|
||||
int main(int argc, const char** argv)
|
||||
{
|
||||
uint64_t datas[][2] = {{1,1},{10,5},{10,3},{1, (uint64_t)-1}, {10, (uint64_t)-3}, {(uint64_t)-10, (uint64_t)-3}};
|
||||
float a, b;
|
||||
uint64_t flags;
|
||||
uint32_t maxf = 0x7f7fffff;
|
||||
uint32_t minf = 0xff7fffff;
|
||||
uint32_t r;
|
||||
|
||||
int sz = sizeof(datas)/sizeof(datas[0]);
|
||||
for(int i=0; i<sz; ++i) {
|
||||
uint64_t rem = 0;
|
||||
uint64_t d = _div_(datas[i][0], datas[i][1], &rem);
|
||||
printf("div %llu, %llu => %llu / %llu\n", datas[i][0], datas[i][1], d, rem);
|
||||
}
|
||||
for(int i=0; i<sz; ++i) {
|
||||
uint64_t rem = 0;
|
||||
uint64_t d = _idiv_(datas[i][0], datas[i][1], &rem);
|
||||
printf("idiv %lld, %lld => %lld / %lld\n", datas[i][0], datas[i][1], d, rem);
|
||||
}
|
||||
return 0;
|
||||
#define GO1(A, N) \
|
||||
a = 1.0f; b = 2.0f; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
flags = A(b, a); \
|
||||
printf(N " %f, %f => 0x%lx\n", b, a, flags); \
|
||||
b = INFINITY; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
flags = A(b, a); \
|
||||
printf(N " %f, %f => 0x%lx\n", b, a, flags); \
|
||||
b = -INFINITY; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
flags = A(b, a); \
|
||||
printf(N " %f, %f => 0x%lx\n", b, a, flags); \
|
||||
b = NAN; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
flags = A(b, a); \
|
||||
printf(N " %f, %f => 0x%lx\n", b, a, flags); \
|
||||
b = a; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
flags = A(b, a); \
|
||||
printf(N " %f, %f => 0x%lx\n", b, a, flags); \
|
||||
a = b = INFINITY; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
a = -INFINITY; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags); \
|
||||
flags = A(b, a); \
|
||||
printf(N " %f, %f => 0x%lx\n", b, a, flags); \
|
||||
a = b = NAN; \
|
||||
flags = A(a, b); \
|
||||
printf(N " %f, %f => 0x%lx\n", a, b, flags);
|
||||
|
||||
#define GO2(A, N) \
|
||||
a = 1.0f; b = 2.0f; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
|
||||
a = -INFINITY; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
|
||||
a = +INFINITY; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
|
||||
a = NAN; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
|
||||
b = *(float*)&maxf; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
|
||||
a = -INFINITY; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r); \
|
||||
a = +INFINITY; \
|
||||
r = A(a, b); \
|
||||
printf(N " %g, %g => %g\n", a, b, *(float*)&r); \
|
||||
r = A(b, a); \
|
||||
printf(N " %g, %g => %g\n", b, a, *(float*)&r);
|
||||
|
||||
GO1(_ucomiss_, "ucomiss")
|
||||
GO2(_minss_, "minss")
|
||||
GO2(_maxss_, "maxss")
|
||||
GO1(_cmpss_0, "cmpss 0")
|
||||
GO1(_cmpss_1, "cmpss 1")
|
||||
GO1(_cmpss_2, "cmpss 2")
|
||||
GO1(_cmpss_3, "cmpss 3")
|
||||
GO1(_cmpss_4, "cmpss 4")
|
||||
GO1(_cmpss_5, "cmpss 5")
|
||||
GO1(_cmpss_6, "cmpss 6")
|
||||
GO1(_cmpss_7, "cmpss 7")
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user