Merge pull request #7956 from unknownbrackets/arm64-minor

Unknown's min/max unordered-float fix for ARM64
2024-11-23 21:39:52 +00:00 · 2015-09-12 11:59:05 -07:00 · 2015-09-12 11:59:05 -07:00 · 0c1a5324fb
commit 0c1a5324fb
parent 1e109a3b27 3301a347b3
3 changed files with 52 additions and 0 deletions
--- a/Common/Arm64Emitter.cpp
+++ b/Common/Arm64Emitter.cpp
@ -1410,6 +1410,10 @@ void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shif
 {
 	EncodeLogicalInst(7, Rd, Rn, Rm, Shift);
 }
+void ARM64XEmitter::TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
+{
+	ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift);
+}

 void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift) {
 	ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift);
--- a/Common/Arm64Emitter.h
+++ b/Common/Arm64Emitter.h
@ -547,6 +547,7 @@ public:
 	void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
 	void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
 	void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
+	void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);

 	// Wrap the above for saner syntax
 	void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
@ -557,6 +558,7 @@ public:
 	void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
 	void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
 	void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
+	void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); }

 	// Convenience wrappers around ORR. These match the official convenience syntax.
 	void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
--- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp
+++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp
@ -654,12 +654,58 @@ namespace MIPSComp {
 				switch ((op >> 23) & 7) {
 				case 2:  // vmin
 				{
+					fp.FCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
+					FixupBranch unordered = B(CC_VS);
 					fp.FMIN(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
+					FixupBranch skip = B();
+
+					SetJumpTarget(unordered);
+					// Move to integer registers, it'll be easier.  Or maybe there's a simd way?
+					fp.FMOV(SCRATCH1, fpr.V(sregs[i]));
+					fp.FMOV(SCRATCH2, fpr.V(tregs[i]));
+					// And together to find if both have negative set.
+					TST(SCRATCH1, SCRATCH2);
+					FixupBranch cmpPositive = B(CC_PL);
+					// If both are negative, "min" is the greater of the two, since it has the largest mantissa.
+					CMP(SCRATCH1, SCRATCH2);
+					CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_GE);
+					FixupBranch skipPositive = B();
+					// If either one is positive, we just want the lowest one.
+					SetJumpTarget(cmpPositive);
+					CMP(SCRATCH1, SCRATCH2);
+					CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_LE);
+					SetJumpTarget(skipPositive);
+					// Now, whether negative or positive, move to the result.
+					fp.FMOV(fpr.V(tempregs[i]), SCRATCH1);
+					SetJumpTarget(skip);
 					break;
 				}
 				case 3:  // vmax
 				{
+					fp.FCMP(fpr.V(sregs[i]), fpr.V(tregs[i]));
+					FixupBranch unordered = B(CC_VS);
 					fp.FMAX(fpr.V(tempregs[i]), fpr.V(sregs[i]), fpr.V(tregs[i]));
+					FixupBranch skip = B();
+
+					SetJumpTarget(unordered);
+					// Move to integer registers, it'll be easier.  Or maybe there's a simd way?
+					fp.FMOV(SCRATCH1, fpr.V(sregs[i]));
+					fp.FMOV(SCRATCH2, fpr.V(tregs[i]));
+					// And together to find if both have negative set.
+					TST(SCRATCH1, SCRATCH2);
+					FixupBranch cmpPositive = B(CC_PL);
+					// If both are negative, "max" is the least of the two, since it has the lowest mantissa.
+					CMP(SCRATCH1, SCRATCH2);
+					CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_LE);
+					FixupBranch skipPositive = B();
+					// If either one is positive, we just want the highest one.
+					SetJumpTarget(cmpPositive);
+					CMP(SCRATCH1, SCRATCH2);
+					CSEL(SCRATCH1, SCRATCH1, SCRATCH2, CC_GE);
+					SetJumpTarget(skipPositive);
+					// Now, whether negative or positive, move to the result.
+					fp.FMOV(fpr.V(tempregs[i]), SCRATCH1);
+					SetJumpTarget(skip);
 					break;
 				}
 				case 6:  // vsge