mirror of
https://github.com/xemu-project/nv2a_vsh_cpu.git
synced 2024-11-23 09:49:39 +00:00
Partial fixes for EXP, LOG, LIT.
This commit is contained in:
parent
39ef5bd4a6
commit
2de2148592
@ -55,10 +55,20 @@ void nv2a_vsh_cpu_mad(float *out, const float *inputs) {
|
|||||||
out[3] = fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W)) + COMP(inputs, 2, _W);
|
out[3] = fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W)) + COMP(inputs, 2, _W);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline float fix_inf(float in) {
|
||||||
|
if (!isinf(in)) {
|
||||||
|
return in;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t fixed = (*(uint32_t*)&in & 0xFF000000) + 0x7FFFFF;
|
||||||
|
return *(float*)&fixed;
|
||||||
|
}
|
||||||
|
|
||||||
void nv2a_vsh_cpu_dp3(float *out, const float *inputs) {
|
void nv2a_vsh_cpu_dp3(float *out, const float *inputs) {
|
||||||
float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
|
float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
|
||||||
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
|
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
|
||||||
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z));
|
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z));
|
||||||
|
result = fix_inf(result);
|
||||||
out[0] = result;
|
out[0] = result;
|
||||||
out[1] = result;
|
out[1] = result;
|
||||||
out[2] = result;
|
out[2] = result;
|
||||||
@ -69,6 +79,7 @@ void nv2a_vsh_cpu_dph(float *out, const float *inputs) {
|
|||||||
float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
|
float result = fix_inf_mult(COMP(inputs, 0, _X), COMP(inputs, 1, _X)) +
|
||||||
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
|
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
|
||||||
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) + COMP(inputs, 1, _W);
|
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) + COMP(inputs, 1, _W);
|
||||||
|
result = fix_inf(result);
|
||||||
out[0] = result;
|
out[0] = result;
|
||||||
out[1] = result;
|
out[1] = result;
|
||||||
out[2] = result;
|
out[2] = result;
|
||||||
@ -81,6 +92,7 @@ void nv2a_vsh_cpu_dp4(float *out, const float *inputs) {
|
|||||||
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
|
fix_inf_mult(COMP(inputs, 0, _Y), COMP(inputs, 1, _Y)) +
|
||||||
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) +
|
fix_inf_mult(COMP(inputs, 0, _Z), COMP(inputs, 1, _Z)) +
|
||||||
fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W));
|
fix_inf_mult(COMP(inputs, 0, _W), COMP(inputs, 1, _W));
|
||||||
|
result = fix_inf(result);
|
||||||
out[0] = result;
|
out[0] = result;
|
||||||
out[1] = result;
|
out[1] = result;
|
||||||
out[2] = result;
|
out[2] = result;
|
||||||
|
@ -24,7 +24,11 @@ void nv2a_vsh_cpu_sge(float *out, const float *inputs);
|
|||||||
void nv2a_vsh_cpu_rcp(float *out, const float *inputs);
|
void nv2a_vsh_cpu_rcp(float *out, const float *inputs);
|
||||||
void nv2a_vsh_cpu_rcc(float *out, const float *inputs);
|
void nv2a_vsh_cpu_rcc(float *out, const float *inputs);
|
||||||
void nv2a_vsh_cpu_rsq(float *out, const float *inputs);
|
void nv2a_vsh_cpu_rsq(float *out, const float *inputs);
|
||||||
|
|
||||||
|
// WARNING: Negative inputs are not valid on hardware and are silently processed
|
||||||
|
// here.
|
||||||
void nv2a_vsh_cpu_exp(float *out, const float *inputs);
|
void nv2a_vsh_cpu_exp(float *out, const float *inputs);
|
||||||
|
|
||||||
void nv2a_vsh_cpu_log(float *out, const float *inputs);
|
void nv2a_vsh_cpu_log(float *out, const float *inputs);
|
||||||
void nv2a_vsh_cpu_lit(float *out, const float *inputs);
|
void nv2a_vsh_cpu_lit(float *out, const float *inputs);
|
||||||
|
|
||||||
|
@ -66,10 +66,14 @@ BOOST_AUTO_TEST_CASE(add_trivial) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//BOOST_AUTO_TEST_CASE(dp3_trivial) {
|
//BOOST_AUTO_TEST_CASE(dp3_trivial) {
|
||||||
|
//
|
||||||
// float inputs[][8] = {
|
// float inputs[][8] = {
|
||||||
|
// {0.123457f, -0.000423457f, -8.901235e+25f, -323457.0f, -6.243211e+15f,
|
||||||
|
// -8.901235e+25f, 0.000423457f, -6.243211e+15f},
|
||||||
// {-8.901235e+25f, 6.432100e-15f, 5.864211e+16f, 1.844675e+19f, 1.844675e+19f, -6.432100e-15f, 1.234568e+20f, -0.123457f}
|
// {-8.901235e+25f, 6.432100e-15f, 5.864211e+16f, 1.844675e+19f, 1.844675e+19f, -6.432100e-15f, 1.234568e+20f, -0.123457f}
|
||||||
// };
|
// };
|
||||||
// float results[][4] = {
|
// float results[][4] = {
|
||||||
|
// {-7.036874418e14f,-7.036874418e14f,-7.036874418e14f,-7.036874418e14f},
|
||||||
// {-3.330426e+38f, -3.330426e+38f, -3.330426e+38f, -3.330426e+38f},
|
// {-3.330426e+38f, -3.330426e+38f, -3.330426e+38f, -3.330426e+38f},
|
||||||
// };
|
// };
|
||||||
//
|
//
|
||||||
@ -87,7 +91,7 @@ BOOST_AUTO_TEST_CASE(add_trivial) {
|
|||||||
//
|
//
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
//
|
|
||||||
//BOOST_AUTO_TEST_CASE(log_trivial) {
|
//BOOST_AUTO_TEST_CASE(log_trivial) {
|
||||||
// float inputs[][4] = {
|
// float inputs[][4] = {
|
||||||
// {-5.864211e16f, 0.0f, 0.0f, 0.0f},
|
// {-5.864211e16f, 0.0f, 0.0f, 0.0f},
|
||||||
|
Loading…
Reference in New Issue
Block a user