From 1fba38fd417767ffa7d870a677bd2712b7791984 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 8 Sep 2015 20:06:53 -0700 Subject: [PATCH 1/3] arm64: Add TST alias to the emitter. --- Common/Arm64Emitter.cpp | 4 ++++ Common/Arm64Emitter.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 3f1fa00cd8..3ffc934027 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -1410,6 +1410,10 @@ void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shif { EncodeLogicalInst(7, Rd, Rn, Rm, Shift); } +void ARM64XEmitter::TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift); +} void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift) { ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift); diff --git a/Common/Arm64Emitter.h b/Common/Arm64Emitter.h index e437131eeb..bf7e30987f 100644 --- a/Common/Arm64Emitter.h +++ b/Common/Arm64Emitter.h @@ -547,6 +547,7 @@ public: void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); // Wrap the above for saner syntax void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } @@ -557,6 +558,7 @@ public: void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); } + void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); } // Convenience wrappers around ORR. These match the official convenience syntax. void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift); From dd548ba12c55f98266b56f2abb99a6113a0bb2d6 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 8 Sep 2015 20:07:10 -0700 Subject: [PATCH 2/3] arm64: Check for unordered in vmin/vmax. --- Core/MIPS/ARM64/Arm64CompVFPU.cpp | 46 +++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index 4cf488f10d..ebe5f692a8 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -654,12 +654,58 @@ namespace MIPSComp { switch ((op >> 23) & 7) { case 2: // vmin { + fp.FCMP(fpr.V(sregs[i]), fpr.V(tregs[i])); + FixupBranch unordered = B(CC_VS); fp.FMIN(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + FixupBranch skip = B(); + + SetJumpTarget(unordered); + // Move to integer registers, it'll be easier. Or maybe there's a simd way? + fp.FMOV(SCRATCH1, fpr.V(sregs[i])); + fp.FMOV(SCRATCH2, fpr.V(sregs[i])); + // And together to find if both have negative set. + TST(SCRATCH1, SCRATCH2); + FixupBranch cmpPositive = B(CC_PL); + // If both are negative, "min" is the greater of the two, since it has the largest mantissa. + CMP(SCRATCH1, SCRATCH2); + CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_GE); + FixupBranch skipPositive = B(); + // If either one is positive, we just want the lowest one. + SetJumpTarget(cmpPositive); + CMP(SCRATCH1, SCRATCH2); + CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_LE); + SetJumpTarget(skipPositive); + // Now, whether negative or positive, move to the result. + fp.FMOV(fpr.V(tempregs[i]), SCRATCH1); + SetJumpTarget(skip); break; } case 3: // vmax { + fp.FCMP(fpr.V(sregs[i]), fpr.V(tregs[i])); + FixupBranch unordered = B(CC_VS); fp.FMAX(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i])); + FixupBranch skip = B(); + + SetJumpTarget(unordered); + // Move to integer registers, it'll be easier. Or maybe there's a simd way? + fp.FMOV(SCRATCH1, fpr.V(sregs[i])); + fp.FMOV(SCRATCH2, fpr.V(sregs[i])); + // And together to find if both have negative set. + TST(SCRATCH1, SCRATCH2); + FixupBranch cmpPositive = B(CC_PL); + // If both are negative, "max" is the least of the two, since it has the lowest mantissa. + CMP(SCRATCH1, SCRATCH2); + CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_LE); + FixupBranch skipPositive = B(); + // If either one is positive, we just want the highest one. + SetJumpTarget(cmpPositive); + CMP(SCRATCH1, SCRATCH2); + CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_GE); + SetJumpTarget(skipPositive); + // Now, whether negative or positive, move to the result. + fp.FMOV(fpr.V(tempregs[i]), SCRATCH1); + SetJumpTarget(skip); break; } case 6: // vsge From 3301a347b3811d131c379e8457d3190df9ed31cd Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sat, 12 Sep 2015 11:56:33 -0700 Subject: [PATCH 3/3] arm64: Dumb typo, oops. --- Core/MIPS/ARM64/Arm64CompVFPU.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index ebe5f692a8..dec8164b89 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -662,7 +662,7 @@ namespace MIPSComp { SetJumpTarget(unordered); // Move to integer registers, it'll be easier. Or maybe there's a simd way? fp.FMOV(SCRATCH1, fpr.V(sregs[i])); - fp.FMOV(SCRATCH2, fpr.V(sregs[i])); + fp.FMOV(SCRATCH2, fpr.V(tregs[i])); // And together to find if both have negative set. TST(SCRATCH1, SCRATCH2); FixupBranch cmpPositive = B(CC_PL); @@ -690,7 +690,7 @@ namespace MIPSComp { SetJumpTarget(unordered); // Move to integer registers, it'll be easier. Or maybe there's a simd way? fp.FMOV(SCRATCH1, fpr.V(sregs[i])); - fp.FMOV(SCRATCH2, fpr.V(sregs[i])); + fp.FMOV(SCRATCH2, fpr.V(tregs[i])); // And together to find if both have negative set. TST(SCRATCH1, SCRATCH2); FixupBranch cmpPositive = B(CC_PL);