Merge pull request #7956 from unknownbrackets/arm64-minor

Unknown's min/max unordered-float fix for ARM64
This commit is contained in:
Unknown W. Brackets 2015-09-12 11:59:05 -07:00
commit 0c1a5324fb
3 changed files with 52 additions and 0 deletions

View File

@ -1410,6 +1410,10 @@ void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shif
{ {
EncodeLogicalInst(7, Rd, Rn, Rm, Shift); EncodeLogicalInst(7, Rd, Rn, Rm, Shift);
} }
void ARM64XEmitter::TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
{
ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift);
}
void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift) { void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift) {
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift); ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift);

View File

@ -547,6 +547,7 @@ public:
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
// Wrap the above for saner syntax // Wrap the above for saner syntax
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
@ -557,6 +558,7 @@ public:
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); }
// Convenience wrappers around ORR. These match the official convenience syntax. // Convenience wrappers around ORR. These match the official convenience syntax.
void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift); void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);

View File

@ -654,12 +654,58 @@ namespace MIPSComp {
switch ((op >> 23) & 7) { switch ((op >> 23) & 7) {
case 2: // vmin case 2: // vmin
{ {
fp.FCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
FixupBranch unordered = B(CC_VS);
fp.FMIN(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); fp.FMIN(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
FixupBranch skip = B();
SetJumpTarget(unordered);
// Move to integer registers, it'll be easier. Or maybe there's a simd way?
fp.FMOV(SCRATCH1, fpr.V(sregs[i]));
fp.FMOV(SCRATCH2, fpr.V(tregs[i]));
// And together to find if both have negative set.
TST(SCRATCH1, SCRATCH2);
FixupBranch cmpPositive = B(CC_PL);
// If both are negative, "min" is the greater of the two, since it has the largest mantissa.
CMP(SCRATCH1, SCRATCH2);
CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_GE);
FixupBranch skipPositive = B();
// If either one is positive, we just want the lowest one.
SetJumpTarget(cmpPositive);
CMP(SCRATCH1, SCRATCH2);
CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_LE);
SetJumpTarget(skipPositive);
// Now, whether negative or positive, move to the result.
fp.FMOV(fpr.V(tempregs[i]), SCRATCH1);
SetJumpTarget(skip);
break; break;
} }
case 3: // vmax case 3: // vmax
{ {
fp.FCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
FixupBranch unordered = B(CC_VS);
fp.FMAX(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); fp.FMAX(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
FixupBranch skip = B();
SetJumpTarget(unordered);
// Move to integer registers, it'll be easier. Or maybe there's a simd way?
fp.FMOV(SCRATCH1, fpr.V(sregs[i]));
fp.FMOV(SCRATCH2, fpr.V(tregs[i]));
// And together to find if both have negative set.
TST(SCRATCH1, SCRATCH2);
FixupBranch cmpPositive = B(CC_PL);
// If both are negative, "max" is the least of the two, since it has the lowest mantissa.
CMP(SCRATCH1, SCRATCH2);
CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_LE);
FixupBranch skipPositive = B();
// If either one is positive, we just want the highest one.
SetJumpTarget(cmpPositive);
CMP(SCRATCH1, SCRATCH2);
CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_GE);
SetJumpTarget(skipPositive);
// Now, whether negative or positive, move to the result.
fp.FMOV(fpr.V(tempregs[i]), SCRATCH1);
SetJumpTarget(skip);
break; break;
} }
case 6: // vsge case 6: // vsge