Partial fixes for dp*, DST

2024-11-26 19:20:22 +00:00 · 2022-06-23 19:00:06 -07:00 · 2022-06-23 19:00:06 -07:00 · 39ef5bd4a6
commit 39ef5bd4a6
parent d757d5c672
2 changed files with 67 additions and 9 deletions
--- a/src/nv2a_vsh_cpu.c
+++ b/src/nv2a_vsh_cpu.c
@ -56,9 +56,9 @@ void nv2a_vsh_cpu_mad(float *out, const float *inputs) {
 }

 void nv2a_vsh_cpu_dp3(float *out, const float *inputs) {
-  float result = COMP(inputs, 0, _X) * COMP(inputs, 1, _X) +
-                 COMP(inputs, 0, _Y) * COMP(inputs, 1, _Y) +
-                 COMP(inputs, 0, _Z) * COMP(inputs, 1, _Z);
+  float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
+                 fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
+                 fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z));
  out[0] = result;
  out[1] = result;
  out[2] = result;
@ -66,9 +66,9 @@ void nv2a_vsh_cpu_dp3(float *out, const float *inputs) {
 }

 void nv2a_vsh_cpu_dph(float *out, const float *inputs) {
-  float result = COMP(inputs, 0, _X) * COMP(inputs, 1, _X) +
-                 COMP(inputs, 0, _Y) * COMP(inputs, 1, _Y) +
-                 COMP(inputs, 0, _Z) * COMP(inputs, 1, _Z) + COMP(inputs, 1, _W);
+  float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
+                 fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
+                 fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) + COMP(inputs, 1, _W);
  out[0] = result;
  out[1] = result;
  out[2] = result;
@ -77,8 +77,10 @@ void nv2a_vsh_cpu_dph(float *out, const float *inputs) {

 void nv2a_vsh_cpu_dp4(float *out, const float *inputs) {
  float result =
-      COMP(inputs, 0, _X) * COMP(inputs, 1, _X) + COMP(inputs, 0, _Y) * COMP(inputs, 1, _Y) +
-      COMP(inputs, 0, _Z) * COMP(inputs, 1, _Z) + COMP(inputs, 0, _W) * COMP(inputs, 1, _W);
+      fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
+      fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
+      fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) +
+      fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W));
  out[0] = result;
  out[1] = result;
  out[2] = result;
@ -87,7 +89,7 @@ void nv2a_vsh_cpu_dp4(float *out, const float *inputs) {

 void nv2a_vsh_cpu_dst(float *out, const float *inputs) {
  out[0] = 1.0f;
-  out[1] = COMP(inputs, 0, _Y) * COMP(inputs, 1, _Y);
+  out[1] = fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y));
  out[2] = COMP(inputs, 0, _Z);
  out[3] = COMP(inputs, 1, _W);
 }
--- a/test/operations/test_basic.cpp
+++ b/test/operations/test_basic.cpp
@ -1,4 +1,5 @@
 #include <boost/test/unit_test.hpp>
+#include <cmath>

 #include "nv2a_vsh_cpu.h"

@ -63,4 +64,59 @@ BOOST_AUTO_TEST_CASE(add_trivial) {
  BOOST_TEST(out[2] == 104.0f);
  BOOST_TEST(out[3] == -36.0f);
 }
+
+//BOOST_AUTO_TEST_CASE(dp3_trivial) {
+//  float inputs[][8] = {
+//      {-8.901235e+25f, 6.432100e-15f, 5.864211e+16f, 1.844675e+19f, 1.844675e+19f, -6.432100e-15f, 1.234568e+20f, -0.123457f}
+//  };
+//  float results[][4] = {
+//      {-3.330426e+38f, -3.330426e+38f, -3.330426e+38f, -3.330426e+38f},
+//  };
+//
+//  for (uint32_t i = 0; i < sizeof(inputs) / sizeof(inputs[0]); ++i) {
+//    float *in = inputs[i];
+//    float *expected = results[i];
+//    BOOST_TEST_INFO(i);
+//    float out[4];
+//    nv2a_vsh_cpu_dp3(out, in);
+//
+//    BOOST_TEST(out[0] == expected[0]);
+//    BOOST_TEST(out[1] == expected[1]);
+//    BOOST_TEST(out[2] == expected[2]);
+//    BOOST_TEST(out[3] == expected[3]);
+//
+//  }
+//}
+//
+//BOOST_AUTO_TEST_CASE(log_trivial) {
+//  float inputs[][4] = {
+//      {-5.864211e16f, 0.0f, 0.0f, 0.0f},
+//      {0.0f, 0.0f, 0.0f, 0.0f},
+//      {-0.0f, 0.0f, 0.0f, 0.0f},
+//      {INFINITY, 0.0f, 0.0f, 0.0f},
+//      {-INFINITY, 0.0f, 0.0f, 0.0f},
+//  };
+//  float results[][4] = {
+//      {55.0f, 1.62765f, 55.7028f, 1.0f},
+//      {-INFINITY, 1.0f, -INFINITY, 1.0f},
+//      {-INFINITY, 1.0f, -INFINITY, 1.0f},
+//      {INFINITY, 1.0f, INFINITY, 1.0f},
+//      {INFINITY, 1.0f, INFINITY, 1.0f},
+//  };
+//
+//  for (uint32_t i = 0; i < sizeof(inputs) / sizeof(inputs[0]); ++i) {
+//    float *in = inputs[i];
+//    float *expected = results[i];
+//    BOOST_TEST_INFO(i);
+//    float out[4];
+//    nv2a_vsh_cpu_log(out, in);
+//
+//    BOOST_TEST(out[0] == expected[0]);
+//    BOOST_TEST(out[1] == expected[1]);
+//    BOOST_TEST(out[2] == expected[2]);
+//    BOOST_TEST(out[3] == expected[3]);
+//
+//  }
+//}
+
 BOOST_AUTO_TEST_SUITE_END()