mirror of
https://github.com/xemu-project/nv2a_vsh_cpu.git
synced 2024-11-23 01:39:38 +00:00
Switches to using a dispatch table for emulated instructions.
This commit is contained in:
parent
25a9fa6835
commit
21014eb5f2
@ -3,138 +3,139 @@
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
void nv2a_vsh_cpu_mov(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
memcpy(out, a, sizeof(*out));
|
||||
void nv2a_vsh_cpu_mov(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
memcpy(out, inputs, sizeof(*out));
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_arl(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
float val = floorf(a->reg.x + 0.001f);
|
||||
void nv2a_vsh_cpu_arl(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float val = floorf(inputs->reg.x + 0.001f);
|
||||
out->reg.x = val;
|
||||
out->reg.y = val;
|
||||
out->reg.z = val;
|
||||
out->reg.w = val;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_mul(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
out->reg.x = a->reg.x * b->reg.x;
|
||||
out->reg.y = a->reg.y * b->reg.y;
|
||||
out->reg.z = a->reg.z * b->reg.z;
|
||||
out->reg.w = a->reg.w * b->reg.w;
|
||||
void nv2a_vsh_cpu_mul(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x = inputs[0].reg.x * inputs[1].reg.x;
|
||||
out->reg.y = inputs[0].reg.y * inputs[1].reg.y;
|
||||
out->reg.z = inputs[0].reg.z * inputs[1].reg.z;
|
||||
out->reg.w = inputs[0].reg.w * inputs[1].reg.w;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_add(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
out->reg.x = a->reg.x + b->reg.x;
|
||||
out->reg.y = a->reg.y + b->reg.y;
|
||||
out->reg.z = a->reg.z + b->reg.z;
|
||||
out->reg.w = a->reg.w + b->reg.w;
|
||||
void nv2a_vsh_cpu_add(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x = inputs[0].reg.x + inputs[1].reg.x;
|
||||
out->reg.y = inputs[0].reg.y + inputs[1].reg.y;
|
||||
out->reg.z = inputs[0].reg.z + inputs[1].reg.z;
|
||||
out->reg.w = inputs[0].reg.w + inputs[1].reg.w;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_mad(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b, const Nv2aVshRegister *c) {
|
||||
out->reg.x = a->reg.x * b->reg.x + c->reg.x;
|
||||
out->reg.y = a->reg.y * b->reg.y + c->reg.y;
|
||||
out->reg.z = a->reg.z * b->reg.z + c->reg.z;
|
||||
out->reg.w = a->reg.w * b->reg.w + c->reg.w;
|
||||
void nv2a_vsh_cpu_mad(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x = inputs[0].reg.x * inputs[1].reg.x + inputs[2].reg.x;
|
||||
out->reg.y = inputs[0].reg.y * inputs[1].reg.y + inputs[2].reg.y;
|
||||
out->reg.z = inputs[0].reg.z * inputs[1].reg.z + inputs[2].reg.z;
|
||||
out->reg.w = inputs[0].reg.w * inputs[1].reg.w + inputs[2].reg.w;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_dp3(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
void nv2a_vsh_cpu_dp3(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float result = inputs[0].reg.x * inputs[1].reg.x +
|
||||
inputs[0].reg.y * inputs[1].reg.y +
|
||||
inputs[0].reg.z * inputs[1].reg.z;
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_dph(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float result = inputs[0].reg.x * inputs[1].reg.x +
|
||||
inputs[0].reg.y * inputs[1].reg.y +
|
||||
inputs[0].reg.z * inputs[1].reg.z + inputs[1].reg.w;
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_dp4(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float result =
|
||||
a->reg.x * b->reg.x + a->reg.y * b->reg.y + a->reg.z * b->reg.z;
|
||||
inputs[0].reg.x * inputs[1].reg.x + inputs[0].reg.y * inputs[1].reg.y +
|
||||
inputs[0].reg.z * inputs[1].reg.z + inputs[0].reg.w * inputs[1].reg.w;
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_dph(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
float result = a->reg.x * b->reg.x + a->reg.y * b->reg.y +
|
||||
a->reg.z * b->reg.z + b->reg.w;
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_dp4(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
float result = a->reg.x * b->reg.x + a->reg.y * b->reg.y +
|
||||
a->reg.z * b->reg.z + a->reg.w * b->reg.w;
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_dst(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
void nv2a_vsh_cpu_dst(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x = 1.0f;
|
||||
out->reg.y = a->reg.y * b->reg.y;
|
||||
out->reg.z = a->reg.z;
|
||||
out->reg.w = b->reg.w;
|
||||
out->reg.y = inputs[0].reg.y * inputs[1].reg.y;
|
||||
out->reg.z = inputs[0].reg.z;
|
||||
out->reg.w = inputs[1].reg.w;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_min(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
out->reg.x = a->reg.x < b->reg.x ? a->reg.x : b->reg.x;
|
||||
out->reg.y = a->reg.y < b->reg.y ? a->reg.y : b->reg.y;
|
||||
out->reg.z = a->reg.z < b->reg.z ? a->reg.z : b->reg.z;
|
||||
out->reg.w = a->reg.w < b->reg.w ? a->reg.w : b->reg.w;
|
||||
void nv2a_vsh_cpu_min(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x =
|
||||
inputs[0].reg.x < inputs[1].reg.x ? inputs[0].reg.x : inputs[1].reg.x;
|
||||
out->reg.y =
|
||||
inputs[0].reg.y < inputs[1].reg.y ? inputs[0].reg.y : inputs[1].reg.y;
|
||||
out->reg.z =
|
||||
inputs[0].reg.z < inputs[1].reg.z ? inputs[0].reg.z : inputs[1].reg.z;
|
||||
out->reg.w =
|
||||
inputs[0].reg.w < inputs[1].reg.w ? inputs[0].reg.w : inputs[1].reg.w;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_max(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
out->reg.x = a->reg.x > b->reg.x ? a->reg.x : b->reg.x;
|
||||
out->reg.y = a->reg.y > b->reg.y ? a->reg.y : b->reg.y;
|
||||
out->reg.z = a->reg.z > b->reg.z ? a->reg.z : b->reg.z;
|
||||
out->reg.w = a->reg.w > b->reg.w ? a->reg.w : b->reg.w;
|
||||
void nv2a_vsh_cpu_max(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x =
|
||||
inputs[0].reg.x > inputs[1].reg.x ? inputs[0].reg.x : inputs[1].reg.x;
|
||||
out->reg.y =
|
||||
inputs[0].reg.y > inputs[1].reg.y ? inputs[0].reg.y : inputs[1].reg.y;
|
||||
out->reg.z =
|
||||
inputs[0].reg.z > inputs[1].reg.z ? inputs[0].reg.z : inputs[1].reg.z;
|
||||
out->reg.w =
|
||||
inputs[0].reg.w > inputs[1].reg.w ? inputs[0].reg.w : inputs[1].reg.w;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_slt(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
out->reg.x = a->reg.x < b->reg.x ? 1.0f : 0.0f;
|
||||
out->reg.y = a->reg.y < b->reg.y ? 1.0f : 0.0f;
|
||||
out->reg.z = a->reg.z < b->reg.z ? 1.0f : 0.0f;
|
||||
out->reg.w = a->reg.w < b->reg.w ? 1.0f : 0.0f;
|
||||
void nv2a_vsh_cpu_slt(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x = inputs[0].reg.x < inputs[1].reg.x ? 1.0f : 0.0f;
|
||||
out->reg.y = inputs[0].reg.y < inputs[1].reg.y ? 1.0f : 0.0f;
|
||||
out->reg.z = inputs[0].reg.z < inputs[1].reg.z ? 1.0f : 0.0f;
|
||||
out->reg.w = inputs[0].reg.w < inputs[1].reg.w ? 1.0f : 0.0f;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_sge(Nv2aVshRegister *out, const Nv2aVshRegister *a,
|
||||
const Nv2aVshRegister *b) {
|
||||
out->reg.x = a->reg.x >= b->reg.x ? 1.0f : 0.0f;
|
||||
out->reg.y = a->reg.y >= b->reg.y ? 1.0f : 0.0f;
|
||||
out->reg.z = a->reg.z >= b->reg.z ? 1.0f : 0.0f;
|
||||
out->reg.w = a->reg.w >= b->reg.w ? 1.0f : 0.0f;
|
||||
void nv2a_vsh_cpu_sge(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
out->reg.x = inputs[0].reg.x >= inputs[1].reg.x ? 1.0f : 0.0f;
|
||||
out->reg.y = inputs[0].reg.y >= inputs[1].reg.y ? 1.0f : 0.0f;
|
||||
out->reg.z = inputs[0].reg.z >= inputs[1].reg.z ? 1.0f : 0.0f;
|
||||
out->reg.w = inputs[0].reg.w >= inputs[1].reg.w ? 1.0f : 0.0f;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_rcp(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
void nv2a_vsh_cpu_rcp(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float result =
|
||||
(a->reg.x == 1.0f ? 1.0f
|
||||
: (a->reg.x == 0.0f ? INFINITY : 1.0f / a->reg.x));
|
||||
(inputs[0].reg.x == 1.0f
|
||||
? 1.0f
|
||||
: (inputs[0].reg.x == 0.0f ? INFINITY : 1.0f / inputs[0].reg.x));
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_rcc(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
void nv2a_vsh_cpu_rcc(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
// TODO: Validate this on HW.
|
||||
float result;
|
||||
if (a->reg.x == 1.0f) {
|
||||
if (inputs[0].reg.x == 1.0f) {
|
||||
result = 1.0f;
|
||||
} else {
|
||||
if (a->reg.x < -1.84467e19f) {
|
||||
if (inputs[0].reg.x < -1.84467e19f) {
|
||||
result = 1.0f / -1.84467e19f;
|
||||
} else if (a->reg.x > -5.42101e-20f && a->reg.x < 0.0f) {
|
||||
} else if (inputs[0].reg.x > -5.42101e-20f && inputs[0].reg.x < 0.0f) {
|
||||
result = 1.0f / -5.42101e-020f;
|
||||
} else if (a->reg.x >= 0 && a->reg.x < 5.42101e-20f) {
|
||||
} else if (inputs[0].reg.x >= 0 && inputs[0].reg.x < 5.42101e-20f) {
|
||||
result = 1.0f / 5.42101e-20f;
|
||||
} else if (a->reg.x > 1.84467e+19f) {
|
||||
} else if (inputs[0].reg.x > 1.84467e+19f) {
|
||||
result = 1.0f / 1.84467e+19f;
|
||||
} else {
|
||||
result = 1.0f / a->reg.x;
|
||||
result = 1.0f / inputs[0].reg.x;
|
||||
}
|
||||
}
|
||||
out->reg.x = result;
|
||||
@ -143,27 +144,28 @@ void nv2a_vsh_cpu_rcc(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_rsq(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
float result = (a->reg.x == 1.0f
|
||||
? 1.0f
|
||||
: (a->reg.x == 0.0f ? INFINITY : 1.0f / sqrtf(a->reg.x)));
|
||||
void nv2a_vsh_cpu_rsq(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float result =
|
||||
(inputs->reg.x == 1.0f
|
||||
? 1.0f
|
||||
: (inputs->reg.x == 0.0f ? INFINITY : 1.0f / sqrtf(inputs->reg.x)));
|
||||
out->reg.x = result;
|
||||
out->reg.y = result;
|
||||
out->reg.z = result;
|
||||
out->reg.w = result;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_exp(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
float tmp = floorf(a->reg.x);
|
||||
void nv2a_vsh_cpu_exp(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
float tmp = floorf(inputs->reg.x);
|
||||
out->reg.x = powf(2.0f, tmp);
|
||||
out->reg.y = a->reg.x - tmp;
|
||||
out->reg.z = powf(2.0f, a->reg.x);
|
||||
out->reg.y = inputs->reg.x - tmp;
|
||||
out->reg.z = powf(2.0f, inputs->reg.x);
|
||||
out->reg.w = 1.0f;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_log(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
void nv2a_vsh_cpu_log(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
// TODO: Validate this on HW.
|
||||
float tmp = fabsf(a->reg.x);
|
||||
float tmp = fabsf(inputs->reg.x);
|
||||
if (tmp == 0.0f) {
|
||||
out->reg.x = -INFINITY;
|
||||
out->reg.y = 1.0f;
|
||||
@ -182,7 +184,7 @@ void nv2a_vsh_cpu_log(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
out->reg.w = 1.0f;
|
||||
}
|
||||
|
||||
void nv2a_vsh_cpu_lit(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
void nv2a_vsh_cpu_lit(Nv2aVshRegister *out, const Nv2aVshRegister *inputs) {
|
||||
static const float kMax = 127.9961f;
|
||||
|
||||
out->reg.x = 1.0f;
|
||||
@ -190,11 +192,13 @@ void nv2a_vsh_cpu_lit(Nv2aVshRegister *out, const Nv2aVshRegister *a) {
|
||||
out->reg.z = 0.0f;
|
||||
out->reg.w = 1.0f;
|
||||
|
||||
float power = a->reg.w < -kMax ? -kMax : (a->reg.w > kMax ? kMax : a->reg.w);
|
||||
if (a->reg.x > 0.0f) {
|
||||
out->reg.y = a->reg.x;
|
||||
if (a->reg.y > 0.0f) {
|
||||
out->reg.z = powf(a->reg.y, power);
|
||||
float power = inputs->reg.w < -kMax
|
||||
? -kMax
|
||||
: (inputs->reg.w > kMax ? kMax : inputs->reg.w);
|
||||
if (inputs->reg.x > 0.0f) {
|
||||
out->reg.y = inputs->reg.x;
|
||||
if (inputs->reg.y > 0.0f) {
|
||||
out->reg.z = powf(inputs->reg.y, power);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -17,38 +17,28 @@ typedef union Nv2aVshRegister_ {
|
||||
float raw[4];
|
||||
} Nv2aVshRegister;
|
||||
|
||||
#define OP_1(name) \
|
||||
void nv2a_vsh_cpu_##name(Nv2aVshRegister *out, const Nv2aVshRegister *a)
|
||||
#define OP_2(name) \
|
||||
void nv2a_vsh_cpu_##name(Nv2aVshRegister *out, const Nv2aVshRegister *a, \
|
||||
const Nv2aVshRegister *b)
|
||||
#define OP_3(name) \
|
||||
void nv2a_vsh_cpu_##name(Nv2aVshRegister *out, const Nv2aVshRegister *a, \
|
||||
const Nv2aVshRegister *b, const Nv2aVshRegister *c)
|
||||
typedef void (*Nv2aVshCpuFunc)(Nv2aVshRegister *out,
|
||||
const Nv2aVshRegister *inputs);
|
||||
|
||||
OP_1(mov);
|
||||
OP_1(arl);
|
||||
OP_2(mul);
|
||||
OP_2(add);
|
||||
OP_3(mad);
|
||||
OP_2(dp3);
|
||||
OP_2(dph);
|
||||
OP_2(dp4);
|
||||
OP_2(dst);
|
||||
OP_2(min);
|
||||
OP_2(max);
|
||||
OP_2(slt);
|
||||
OP_2(sge);
|
||||
OP_1(rcp);
|
||||
OP_1(rcc);
|
||||
OP_1(rsq);
|
||||
OP_1(exp);
|
||||
OP_1(log);
|
||||
OP_1(lit);
|
||||
|
||||
#undef OP_1
|
||||
#undef OP_2
|
||||
#undef OP_3
|
||||
void nv2a_vsh_cpu_mov(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_arl(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_mul(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_add(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_mad(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_dp3(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_dph(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_dp4(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_dst(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_min(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_max(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_slt(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_sge(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_rcp(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_rcc(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_rsq(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_exp(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_log(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
void nv2a_vsh_cpu_lit(Nv2aVshRegister *out, const Nv2aVshRegister *inputs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}; // extern "C"
|
||||
|
@ -3,6 +3,31 @@
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
// clang format off
|
||||
static Nv2aVshCpuFunc kDispatchTable[] = {
|
||||
NULL,
|
||||
nv2a_vsh_cpu_mov,
|
||||
nv2a_vsh_cpu_mul,
|
||||
nv2a_vsh_cpu_add,
|
||||
nv2a_vsh_cpu_mad,
|
||||
nv2a_vsh_cpu_dp3,
|
||||
nv2a_vsh_cpu_dph,
|
||||
nv2a_vsh_cpu_dp4,
|
||||
nv2a_vsh_cpu_dst,
|
||||
nv2a_vsh_cpu_min,
|
||||
nv2a_vsh_cpu_max,
|
||||
nv2a_vsh_cpu_slt,
|
||||
nv2a_vsh_cpu_sge,
|
||||
nv2a_vsh_cpu_arl,
|
||||
nv2a_vsh_cpu_rcp,
|
||||
nv2a_vsh_cpu_rcc,
|
||||
nv2a_vsh_cpu_rsq,
|
||||
nv2a_vsh_cpu_exp,
|
||||
nv2a_vsh_cpu_log,
|
||||
nv2a_vsh_cpu_lit,
|
||||
};
|
||||
// clang format on
|
||||
|
||||
static inline void set_register(Nv2aVshRegister *out, const Nv2aVshRegister *in,
|
||||
const uint8_t *swizzle, bool negate) {
|
||||
float mult = negate ? -1.0f : 1.0f;
|
||||
@ -48,37 +73,13 @@ static inline void fetch_value(Nv2aVshRegister *out,
|
||||
static inline void apply_operation(Nv2aVshExecutionState *state,
|
||||
const Nv2aVshOperation *op,
|
||||
const Nv2aVshRegister *inputs) {
|
||||
Nv2aVshRegister output;
|
||||
|
||||
switch (op->opcode) {
|
||||
case NV2AOP_NOP:
|
||||
return;
|
||||
|
||||
case NV2AOP_MOV:
|
||||
nv2a_vsh_cpu_mov(&output, inputs);
|
||||
break;
|
||||
|
||||
case NV2AOP_MUL:
|
||||
case NV2AOP_ADD:
|
||||
case NV2AOP_MAD:
|
||||
case NV2AOP_DP3:
|
||||
case NV2AOP_DPH:
|
||||
case NV2AOP_DP4:
|
||||
case NV2AOP_DST:
|
||||
case NV2AOP_MIN:
|
||||
case NV2AOP_MAX:
|
||||
case NV2AOP_SLT:
|
||||
case NV2AOP_SGE:
|
||||
case NV2AOP_ARL:
|
||||
case NV2AOP_RCP:
|
||||
case NV2AOP_RCC:
|
||||
case NV2AOP_RSQ:
|
||||
case NV2AOP_EXP:
|
||||
case NV2AOP_LOG:
|
||||
case NV2AOP_LIT:
|
||||
break;
|
||||
if (op->opcode == NV2AOP_NOP) {
|
||||
return;
|
||||
}
|
||||
|
||||
Nv2aVshRegister result;
|
||||
kDispatchTable[op->opcode](&result, inputs);
|
||||
|
||||
const Nv2aVshOutput *out = op->outputs;
|
||||
for (uint32_t i = 0; i < 2; ++i, ++out) {
|
||||
Nv2aVshRegister *outreg;
|
||||
@ -90,7 +91,7 @@ static inline void apply_operation(Nv2aVshExecutionState *state,
|
||||
continue;
|
||||
|
||||
case NV2ART_OUTPUT:
|
||||
assert(out->index < 13 && "Invalid output register target.");
|
||||
assert(out->index < 13 && "Invalid result register target.");
|
||||
outreg = (Nv2aVshRegister *)(state->output_regs + out->index * 4);
|
||||
break;
|
||||
|
||||
@ -110,16 +111,16 @@ static inline void apply_operation(Nv2aVshExecutionState *state,
|
||||
}
|
||||
|
||||
if (out->writemask & NV2AWM_X) {
|
||||
outreg->reg.x = output.reg.x;
|
||||
outreg->reg.x = result.reg.x;
|
||||
}
|
||||
if (out->writemask & NV2AWM_Y) {
|
||||
outreg->reg.y = output.reg.y;
|
||||
outreg->reg.y = result.reg.y;
|
||||
}
|
||||
if (out->writemask & NV2AWM_Z) {
|
||||
outreg->reg.z = output.reg.z;
|
||||
outreg->reg.z = result.reg.z;
|
||||
}
|
||||
if (out->writemask & NV2AWM_W) {
|
||||
outreg->reg.w = output.reg.w;
|
||||
outreg->reg.w = result.reg.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user