All GTE basic tests pass now - simias - try backporting the PGXP

patches to the non-GTE_SPEEDHACKS paths - you can decide for yourself whether or not to kill the GTE_SPEEDHACKS code after this
2025-02-20 00:42:13 +00:00 · 2018-02-21 17:16:29 +01:00 · 2018-02-21 17:16:29 +01:00 · 0d38a5de1e
commit 0d38a5de1e
parent bddafd31ea
1 changed files with 318 additions and 20 deletions
--- a/mednafen/psx/gte.cpp
+++ b/mednafen/psx/gte.cpp
@ -29,6 +29,10 @@

 extern bool psx_gte_overclock;

+#if 0
+#define GTE_SPEEDHACKS
+#endif
+
 #include "../clamp.h"

 /* Notes:
@ -719,6 +723,36 @@ uint32_t GTE_ReadDR(unsigned int which)
   return(ret);
 }

+#define sign_x_to_s64(_bits, _value) (((int64_t)((uint64_t)(_value) << (64 - _bits))) >> (64 - _bits))
+
+static INLINE int64_t A_MV(unsigned which, int64_t value)
+{
+   if(value >= (INT64_C(1) << 43))
+      FLAGS |= 1 << (30 - which);
+
+   if(value < -(INT64_C(1) << 43))
+      FLAGS |= 1 << (27 - which);
+
+   return sign_x_to_s64(44, value);
+}
+
+static INLINE int64_t F(int64_t value)
+{
+   if(value < -2147483648LL)
+   {
+      // flag set here
+      FLAGS |= 1 << 15;
+   }
+
+   if(value > 2147483647LL)
+   {
+      // flag set here
+      FLAGS |= 1 << 16;
+   }
+   return(value);
+}
+
+
 /* Truncate i64 value to only keep the low 43 bits + sign and
 * update the flags if an overflow occurs */
 static INLINE int64_t i64_to_i44(unsigned which, int64_t value)
@ -773,6 +807,109 @@ static INLINE int16_t Lm_B_PTZ(unsigned int which, int32_t value, int32_t ftv_va
   return(value);
 }

+static INLINE uint8_t Lm_C(unsigned int which, int32_t value)
+{
+   if(value & ~0xFF)
+   {
+      // Set flag here
+      FLAGS |= 1 << (21 - which);	// Tested with GPF
+
+      if(value < 0)
+         value = 0;
+
+      if(value > 255)
+         value = 255;
+   }
+
+   return(value);
+}
+
+static INLINE int32_t Lm_D(int32_t value, int unchained)
+{
+   // Not sure if we should have it as int64, or just chain on to and special case when the F flags are set.
+   if(!unchained)
+   {
+      if(FLAGS & (1 << 15))
+      {
+         FLAGS |= 1 << 18;
+         return(0);
+      }
+
+      if(FLAGS & (1 << 16))
+      {
+         FLAGS |= 1 << 18;
+         return(0xFFFF);
+      }
+   }
+
+   if(value < 0)
+   {
+      // Set flag here
+      value = 0;
+      FLAGS |= 1 << 18;	// Tested with AVSZ3
+   }
+   else if(value > 65535)
+   {
+      // Set flag here.
+      value = 65535;
+      FLAGS |= 1 << 18;	// Tested with AVSZ3
+   }
+
+   return(value);
+}
+
+static INLINE int32_t Lm_G(unsigned int which, int32_t value)
+{
+   if(value < -1024)
+   {
+      // Set flag here
+      value = -1024;
+      FLAGS |= 1 << (14 - which);
+   }
+
+   if(value > 1023)
+   {
+      // Set flag here.
+      value = 1023;
+      FLAGS |= 1 << (14 - which);
+   }
+
+   return(value);
+}
+
+// limit to 4096, not 4095
+static INLINE int32_t Lm_H(int32_t value)
+{
+#if 0
+   if(FLAGS & (1 << 15))
+   {
+      value = 0;
+      FLAGS |= 1 << 12;
+      return value;
+   }
+
+   if(FLAGS & (1 << 16))
+   {
+      value = 4096;
+      FLAGS |= 1 << 12;
+      return value;
+   }
+#endif
+
+   if(value < 0)
+   {
+      value = 0;
+      FLAGS |= 1 << 12;
+   }
+
+   if(value > 4096)
+   {
+      value = 4096;
+      FLAGS |= 1 << 12;
+   }
+
+   return(value);
+}

 /* Convert a 64bit signed average value to an unsigned halfword
 * while updating the overflow flags */
@ -829,24 +966,6 @@ static INLINE int32_t i32_to_i11_saturate(uint8_t flag, int32_t value)
   return value;
 }

-// limit to 4096, not 4095
-static INLINE int32_t Lm_H(int32_t depth)
-{
-   if(depth < 0)
-   {
-      FLAGS |= 1 << 12;
-      return 0;
-   }
-
-   if(depth > 4096)
-   {
-      FLAGS |= 1 << 12;
-      return 4096;
-   }
-
-   return depth;
-}
-
 static INLINE uint8_t MAC_to_COLOR(uint8_t flag, int32_t mac)
 {
   int32_t c = mac >> 4;
@ -868,24 +987,57 @@ static INLINE void MAC_to_RGB_FIFO(void)
 {
   RGB_FIFO[0] = RGB_FIFO[1];
   RGB_FIFO[1] = RGB_FIFO[2];
+#ifdef GTE_SPEEDHACKS
   RGB_FIFO[2].R = MAC_to_COLOR(0, MAC[1]);
   RGB_FIFO[2].G = MAC_to_COLOR(1, MAC[2]);
   RGB_FIFO[2].B = MAC_to_COLOR(2, MAC[3]);
+#else
+   RGB_FIFO[2].R = Lm_C(0, MAC[1] >> 4);
+   RGB_FIFO[2].G = Lm_C(1, MAC[2] >> 4);
+   RGB_FIFO[2].B = Lm_C(2, MAC[3] >> 4);
+#endif
   RGB_FIFO[2].CD = RGB.CD;
 }

+static INLINE int16_t Lm_B(unsigned int which, int32_t value, int lm)
+{
+   int32_t tmp = lm << 15;
+
+   if(value < (-32768 + tmp))
+   {
+      // set flag here
+      FLAGS |= 1 << (24 - which);
+      value = -32768 + tmp;
+   }
+
+   if(value > 32767)
+   {
+      // Set flag here
+      FLAGS |= 1 << (24 - which);
+      value = 32767;
+   }
+
+   return(value);
+}

 static INLINE void MAC_to_IR(int lm)
 {
+#ifdef GTE_SPEEDHACKS
   IR1 = i32_to_i16_saturate(0, MAC[1], lm);
   IR2 = i32_to_i16_saturate(1, MAC[2], lm);
   IR3 = i32_to_i16_saturate(2, MAC[3], lm);
+#else
+   IR1 = Lm_B(0, MAC[1], lm);
+   IR2 = Lm_B(1, MAC[2], lm);
+   IR3 = Lm_B(2, MAC[3], lm);
+#endif
 }

 static INLINE void MultiplyMatrixByVector(const gtematrix *matrix, const int16_t *v, const int32_t *crv, uint32_t sf, int lm)
 {
   unsigned i;

+#ifdef GTE_SPEEDHACKS
   if(MDFN_LIKELY(matrix != &Matrices.AbbyNormal))
   {
      if(crv == CRVectors.FC)
@ -979,22 +1131,109 @@ static INLINE void MultiplyMatrixByVector(const gtematrix *matrix, const int16_t
         }
      }
   }
+#else
+   for(i = 0; i < 3; i++)
+   {
+      int64_t tmp;
+      int32_t mulr[3];
+
+      tmp = (uint64_t)(int64_t)crv[i] << 12;
+
+      if(matrix == &Matrices.AbbyNormal)
+      {
+         if(i == 0)
+         {
+            mulr[0] = -(RGB.R << 4);
+            mulr[1] = (RGB.R << 4);
+            mulr[2] = IR0;
+         }
+         else
+         {
+            mulr[0] = (int16_t)CR[i];
+            mulr[1] = (int16_t)CR[i];
+            mulr[2] = (int16_t)CR[i];
+         }
+      }
+      else
+      {
+         mulr[0] = matrix->MX[i][0];
+         mulr[1] = matrix->MX[i][1];
+         mulr[2] = matrix->MX[i][2];
+      }
+      mulr[0] *= v[0];
+      mulr[1] *= v[1];
+      mulr[2] *= v[2];
+
+      tmp = A_MV(i, tmp + mulr[0]);
+      if(crv == CRVectors.FC)
+      {
+         Lm_B(i, tmp >> sf, FALSE);
+         tmp = 0;
+      }
+
+      tmp = A_MV(i, tmp + mulr[1]);
+      tmp = A_MV(i, tmp + mulr[2]);
+
+      MAC[1 + i] = tmp >> sf;
+   }
+#endif

   MAC_to_IR(lm);
 }

+static INLINE void MultiplyMatrixByVector_PT(const gtematrix *matrix, const int16_t *v, const int32_t *crv, uint32_t sf, int lm)
+{
+   int64_t tmp[3];
+   unsigned i;
+
+   for(i = 0; i < 3; i++)
+   {
+      int32_t mulr[3];
+
+      tmp[i] = (uint64_t)(int64_t)crv[i] << 12;
+
+      mulr[0] = matrix->MX[i][0] * v[0];
+      mulr[1] = matrix->MX[i][1] * v[1];
+      mulr[2] = matrix->MX[i][2] * v[2];
+
+      tmp[i] = A_MV(i, tmp[i] + mulr[0]);
+      tmp[i] = A_MV(i, tmp[i] + mulr[1]);
+      tmp[i] = A_MV(i, tmp[i] + mulr[2]);
+
+      MAC[1 + i] = tmp[i] >> sf;
+   }
+
+   IR1 = Lm_B(0, MAC[1], lm);
+   IR2 = Lm_B(1, MAC[2], lm);
+   //printf("FTV: %08x %08x\n", crv[2], (uint32)(tmp[2] >> 12));
+   IR3 = Lm_B_PTZ(2, MAC[3], tmp[2] >> 12, lm);
+
+   Z_FIFO[0] = Z_FIFO[1];
+   Z_FIFO[1] = Z_FIFO[2];
+   Z_FIFO[2] = Z_FIFO[3];
+   Z_FIFO[3] = Lm_D(tmp[2] >> 12, TRUE);
+}
+
 /* SQR - Square Vector */
 static int32_t SQR(uint32_t instr)
 {
-   unsigned i;
   const uint32_t sf = (instr & (1 << 19)) ? 12 : 0;
   const int      lm = (instr >> 10) & 1;

+#ifdef GTE_SPEEDHACKS
+   /* PSX GTE test fails with this code */
+   unsigned i;
+
   for (i = 0; i < 4; i++)
   {
      int32_t ir = IR[i];
      MAC[i]     = (ir * ir) >> sf;
   }
+#else
+   MAC[1] = ((IR1 * IR1) >> sf);
+   MAC[2] = ((IR2 * IR2) >> sf);
+   MAC[3] = ((IR3 * IR3) >> sf);
+#endif

   MAC_to_IR(lm);

@ -1060,6 +1299,7 @@ static INLINE void check_mac_overflow(int64_t value)
      FLAGS |= 1 << 16;
 }

+#ifdef GTE_SPEEDHACKS
 static INLINE void TransformXY(int64_t h_div_sz, float precise_h_div_sz, uint16 z)
 {
   float fofx       = ((float)OFX / (float)(1 << 16));
@ -1094,6 +1334,20 @@ static INLINE void TransformXY(int64_t h_div_sz, float precise_h_div_sz, uint16
   uint32 value = *((uint32*)&XY_FIFO[3]);
   PGXP_pushSXYZ2f(precise_x, precise_y, (float)z, value);
 }
+#else
+static INLINE void TransformXY(int64_t h_div_sz)
+{
+   MAC[0] = F((int64_t)OFX + IR1 * h_div_sz * ((widescreen_hack) ? 0.75 : 1.00)) >> 16;
+   XY_FIFO[3].X = Lm_G(0, MAC[0]);
+
+   MAC[0] = F((int64_t)OFY + IR2 * h_div_sz) >> 16;
+   XY_FIFO[3].Y = Lm_G(1, MAC[0]);
+
+   XY_FIFO[0] = XY_FIFO[1];
+   XY_FIFO[1] = XY_FIFO[2];
+   XY_FIFO[2] = XY_FIFO[3];
+}
+#endif

 /* Perform depth queuing calculations using the projection
 * factor computed by the 'RTP' command */
@ -1114,6 +1368,13 @@ static INLINE void depth_queuing(int64_t h_div_sz)
   IR0    = Lm_H(((int64_t)depth));
 }

+static INLINE void TransformDQ(int64_t h_div_sz)
+{
+   MAC[0] = F((int64_t)DQB + DQA * h_div_sz);
+   IR0 = Lm_H(((int64_t)DQB + DQA * h_div_sz) >> 12);
+}
+
+#ifdef GTE_SPEEDHACKS
 /* Rotate, Translate and Perspective transform a single vector
 * Returns the projection factor that's also used for depth 
 * queuing */
@ -1188,11 +1449,24 @@ static int64_t RTP(uint32_t instr, uint32_t vector_index)

   return projection_factor;
 }
+#endif

 static int32_t RTPS(uint32_t instr)
 {
+#ifdef GTE_SPEEDHACKS
   int64_t projection_factor = RTP(instr, 0);
   depth_queuing(projection_factor);
+#else
+   int64_t h_div_sz;
+   const uint32_t sf = (instr & (1 << 19)) ? 12 : 0;
+   const int      lm = (instr >> 10) & 1;
+
+   MultiplyMatrixByVector_PT(&Matrices.Rot, Vectors[0], CRVectors.T, sf, lm);
+   h_div_sz = Divide(H, Z_FIFO[3]);
+
+   TransformXY(h_div_sz);
+   TransformDQ(h_div_sz);
+#endif

   return(15);
 }
@ -1201,11 +1475,30 @@ static int32_t RTPS(uint32_t instr)
 * Operates on v0, v1 and v2 */
 static int32_t RTPT(uint32_t instr)
 {
+#ifdef GTE_SPEEDHACKS
   int64_t projection_factor;
   RTP(instr, 0);
   RTP(instr, 1);
   projection_factor = RTP(instr, 2);
   depth_queuing(projection_factor);
+#else
+   unsigned i;
+   const uint32_t sf = (instr & (1 << 19)) ? 12 : 0;
+   const int      lm = (instr >> 10) & 1;
+
+   for(i = 0; i < 3; i++)
+   {
+      int64_t h_div_sz;
+
+      MultiplyMatrixByVector_PT(&Matrices.Rot, Vectors[i], CRVectors.T, sf, lm);
+      h_div_sz = Divide(H, Z_FIFO[3]);
+
+      TransformXY(h_div_sz);
+
+      if(i == 2)
+         TransformDQ(h_div_sz);
+   }
+#endif

   return(23);
 }
@ -1468,10 +1761,11 @@ static int32_t CDP(uint32_t instr)
   return(13);
 }

-
 /* Normal Clipping */
 static int32_t NCLIP(uint32_t instr)
 {
+#ifdef GTE_SPEEDHACKS
+   /* PSX GTE test fails with this code */
   int16_t x0     = XY_FIFO[0].X;
   int16_t y0     = XY_FIFO[0].Y;
   int16_t x1     = XY_FIFO[1].X;
@ -1490,6 +1784,10 @@ static int32_t NCLIP(uint32_t instr)
   check_mac_overflow(sum);

   MAC[0] = sum;
+#else
+   MAC[0] = F( (int64_t)(XY_FIFO[0].X * (XY_FIFO[1].Y - XY_FIFO[2].Y)) + (XY_FIFO[1].X * (XY_FIFO[2].Y - XY_FIFO[0].Y)) + (XY_FIFO[2].X * (XY_FIFO[0].Y - XY_FIFO[1].Y))
+         );
+#endif

   return(8);
 }