Partial fixes for EXP, LOG, LIT.

This commit is contained in:
Erik Abair 2022-06-23 19:24:35 -07:00
parent 39ef5bd4a6
commit 2de2148592
3 changed files with 21 additions and 1 deletions

View File

@ -55,10 +55,20 @@ void nv2a_vsh_cpu_mad(float *out, const float *inputs) {
out[3] = fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W)) + COMP(inputs, 2, _W);
}
static inline float fix_inf(float in) {
if (!isinf(in)) {
return in;
}
uint32_t fixed = (*(uint32_t*)&in & 0xFF000000) + 0x7FFFFF;
return *(float*)&fixed;
}
void nv2a_vsh_cpu_dp3(float *out, const float *inputs) {
float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z));
result = fix_inf(result);
out[0] = result;
out[1] = result;
out[2] = result;
@ -69,6 +79,7 @@ void nv2a_vsh_cpu_dph(float *out, const float *inputs) {
float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) + COMP(inputs, 1, _W);
result = fix_inf(result);
out[0] = result;
out[1] = result;
out[2] = result;
@ -81,6 +92,7 @@ void nv2a_vsh_cpu_dp4(float *out, const float *inputs) {
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) +
fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W));
result = fix_inf(result);
out[0] = result;
out[1] = result;
out[2] = result;

View File

@ -24,7 +24,11 @@ void nv2a_vsh_cpu_sge(float *out, const float *inputs);
void nv2a_vsh_cpu_rcp(float *out, const float *inputs);
void nv2a_vsh_cpu_rcc(float *out, const float *inputs);
void nv2a_vsh_cpu_rsq(float *out, const float *inputs);
// WARNING: Negative inputs are not valid on hardware and are silently processed
// here.
void nv2a_vsh_cpu_exp(float *out, const float *inputs);
void nv2a_vsh_cpu_log(float *out, const float *inputs);
void nv2a_vsh_cpu_lit(float *out, const float *inputs);

View File

@ -66,10 +66,14 @@ BOOST_AUTO_TEST_CASE(add_trivial) {
}
//BOOST_AUTO_TEST_CASE(dp3_trivial) {
//
// float inputs[][8] = {
// {0.123457f, -0.000423457f, -8.901235e+25f, -323457.0f, -6.243211e+15f,
// -8.901235e+25f, 0.000423457f, -6.243211e+15f},
// {-8.901235e+25f, 6.432100e-15f, 5.864211e+16f, 1.844675e+19f, 1.844675e+19f, -6.432100e-15f, 1.234568e+20f, -0.123457f}
// };
// float results[][4] = {
// {-7.036874418e14f,-7.036874418e14f,-7.036874418e14f,-7.036874418e14f},
// {-3.330426e+38f, -3.330426e+38f, -3.330426e+38f, -3.330426e+38f},
// };
//
@ -87,7 +91,7 @@ BOOST_AUTO_TEST_CASE(add_trivial) {
//
// }
//}
//
//BOOST_AUTO_TEST_CASE(log_trivial) {
// float inputs[][4] = {
// {-5.864211e16f, 0.0f, 0.0f, 0.0f},