mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
Just add a packed version of Vec3f.
This way we can have it aligned to memory where needed. I think it'd be better to avoid this if possible so that we can actually vectorize spline/etc. code. Fixes #5673.
This commit is contained in:
parent
38d0bac1df
commit
6630e45eff
@ -24,6 +24,6 @@
|
||||
struct SimpleVertex {
|
||||
float uv[2];
|
||||
u8 color[4];
|
||||
Vec3f nrm;
|
||||
Vec3f pos;
|
||||
Vec3Packedf nrm;
|
||||
Vec3Packedf pos;
|
||||
};
|
||||
|
@ -80,16 +80,16 @@ u32 TransformDrawEngine::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inP
|
||||
float weights[8];
|
||||
reader.ReadWeights(weights);
|
||||
// Skinning
|
||||
Vec3f psum(0,0,0);
|
||||
Vec3f nsum(0,0,0);
|
||||
Vec3Packedf psum(0,0,0);
|
||||
Vec3Packedf nsum(0,0,0);
|
||||
for (int w = 0; w < numBoneWeights; w++) {
|
||||
if (weights[w] != 0.0f) {
|
||||
Vec3ByMatrix43(bpos, pos, gstate.boneMatrix+w*12);
|
||||
Vec3f tpos(bpos);
|
||||
Vec3Packedf tpos(bpos);
|
||||
psum += tpos * weights[w];
|
||||
|
||||
Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix+w*12);
|
||||
Vec3f tnorm(bnrm);
|
||||
Vec3Packedf tnorm(bnrm);
|
||||
nsum += tnorm * weights[w];
|
||||
}
|
||||
}
|
||||
@ -288,11 +288,11 @@ inline float bern2deriv(float x) { return 3 * (2 - 3 * x) * x; }
|
||||
inline float bern3deriv(float x) { return 3 * x * x; }
|
||||
|
||||
// http://en.wikipedia.org/wiki/Bernstein_polynomial
|
||||
Vec3f Bernstein3D(const Vec3f p0, const Vec3f p1, const Vec3f p2, const Vec3f p3, float x) {
|
||||
Vec3Packedf Bernstein3D(const Vec3Packedf p0, const Vec3Packedf p1, const Vec3Packedf p2, const Vec3Packedf p3, float x) {
|
||||
return p0 * bern0(x) + p1 * bern1(x) + p2 * bern2(x) + p3 * bern3(x);
|
||||
}
|
||||
|
||||
Vec3f Bernstein3DDerivative(const Vec3f p0, const Vec3f p1, const Vec3f p2, const Vec3f p3, float x) {
|
||||
Vec3Packedf Bernstein3DDerivative(const Vec3Packedf p0, const Vec3Packedf p1, const Vec3Packedf p2, const Vec3Packedf p3, float x) {
|
||||
return p0 * bern0deriv(x) + p1 * bern1deriv(x) + p2 * bern2deriv(x) + p3 * bern3deriv(x);
|
||||
}
|
||||
|
||||
@ -379,7 +379,7 @@ void TesselateSplinePatch(u8 *&dest, int &count, const SplinePatch &spatch, u32
|
||||
// Generate normal if lighting is enabled (otherwise there's no point).
|
||||
// This is a really poor quality algorithm, we get facet normals.
|
||||
if (gstate.isLightingEnabled()) {
|
||||
Vec3f norm = Cross(v1.pos - v0.pos, v2.pos - v0.pos);
|
||||
Vec3Packedf norm = Cross(v1.pos - v0.pos, v2.pos - v0.pos);
|
||||
norm.Normalize();
|
||||
if (gstate.patchfacing & 1)
|
||||
norm *= -1.0f;
|
||||
@ -503,8 +503,8 @@ void TesselateSplinePatch(u8 *&dest, int &count, const SplinePatch &spatch, u32
|
||||
int r = std::min(patch_div_s, u + 1);
|
||||
int b = std::min(patch_div_t, v + 1);
|
||||
|
||||
const Vec3f &right = vertices[v * (patch_div_s + 1) + r].pos - vertices[v * (patch_div_s + 1) + l].pos;
|
||||
const Vec3f &down = vertices[b * (patch_div_s + 1) + u].pos - vertices[t * (patch_div_s + 1) + u].pos;
|
||||
const Vec3Packedf &right = vertices[v * (patch_div_s + 1) + r].pos - vertices[v * (patch_div_s + 1) + l].pos;
|
||||
const Vec3Packedf &down = vertices[b * (patch_div_s + 1) + u].pos - vertices[t * (patch_div_s + 1) + u].pos;
|
||||
|
||||
vertices[v * (patch_div_s + 1) + u].nrm = Cross(right, down).Normalized();
|
||||
if (gstate.patchfacing & 1) {
|
||||
@ -570,7 +570,7 @@ void TesselateBezierPatch(u8 *&dest, int &count, int tess_u, int tess_v, const B
|
||||
// Generate normal if lighting is enabled (otherwise there's no point).
|
||||
// This is a really poor quality algorithm, we get facet normals.
|
||||
if (gstate.isLightingEnabled()) {
|
||||
Vec3f norm = Cross(v1.pos - v0.pos, v2.pos - v0.pos);
|
||||
Vec3Packedf norm = Cross(v1.pos - v0.pos, v2.pos - v0.pos);
|
||||
norm.Normalize();
|
||||
if (gstate.patchfacing & 1)
|
||||
norm *= -1.0f;
|
||||
@ -591,10 +591,10 @@ void TesselateBezierPatch(u8 *&dest, int &count, int tess_u, int tess_v, const B
|
||||
// First compute all the vertices and put them in an array
|
||||
SimpleVertex *vertices = new SimpleVertex[(tess_u + 1) * (tess_v + 1)];
|
||||
|
||||
Vec3f *horiz = new Vec3f[(tess_u + 1) * 4];
|
||||
Vec3f *horiz2 = horiz + (tess_u + 1) * 1;
|
||||
Vec3f *horiz3 = horiz + (tess_u + 1) * 2;
|
||||
Vec3f *horiz4 = horiz + (tess_u + 1) * 3;
|
||||
Vec3Packedf *horiz = new Vec3Packedf[(tess_u + 1) * 4];
|
||||
Vec3Packedf *horiz2 = horiz + (tess_u + 1) * 1;
|
||||
Vec3Packedf *horiz3 = horiz + (tess_u + 1) * 2;
|
||||
Vec3Packedf *horiz4 = horiz + (tess_u + 1) * 3;
|
||||
|
||||
// Precompute the horizontal curves to we only have to evaluate the vertical ones.
|
||||
for (int i = 0; i < tess_u + 1; i++) {
|
||||
@ -615,20 +615,20 @@ void TesselateBezierPatch(u8 *&dest, int &count, int tess_u, int tess_v, const B
|
||||
float bv = v;
|
||||
|
||||
// TODO: Should be able to precompute the four curves per U, then just Bernstein per V. Will benefit large tesselation factors.
|
||||
const Vec3f &pos1 = horiz[tile_u];
|
||||
const Vec3f &pos2 = horiz2[tile_u];
|
||||
const Vec3f &pos3 = horiz3[tile_u];
|
||||
const Vec3f &pos4 = horiz4[tile_u];
|
||||
const Vec3Packedf &pos1 = horiz[tile_u];
|
||||
const Vec3Packedf &pos2 = horiz2[tile_u];
|
||||
const Vec3Packedf &pos3 = horiz3[tile_u];
|
||||
const Vec3Packedf &pos4 = horiz4[tile_u];
|
||||
|
||||
SimpleVertex &vert = vertices[tile_v * (tess_u + 1) + tile_u];
|
||||
|
||||
if (computeNormals) {
|
||||
Vec3f derivU1 = Bernstein3DDerivative(patch.points[0]->pos, patch.points[1]->pos, patch.points[2]->pos, patch.points[3]->pos, bu);
|
||||
Vec3f derivU2 = Bernstein3DDerivative(patch.points[4]->pos, patch.points[5]->pos, patch.points[6]->pos, patch.points[7]->pos, bu);
|
||||
Vec3f derivU3 = Bernstein3DDerivative(patch.points[8]->pos, patch.points[9]->pos, patch.points[10]->pos, patch.points[11]->pos, bu);
|
||||
Vec3f derivU4 = Bernstein3DDerivative(patch.points[12]->pos, patch.points[13]->pos, patch.points[14]->pos, patch.points[15]->pos, bu);
|
||||
Vec3f derivU = Bernstein3D(derivU1, derivU2, derivU3, derivU4, bv);
|
||||
Vec3f derivV = Bernstein3DDerivative(pos1, pos2, pos3, pos4, bv);
|
||||
Vec3Packedf derivU1 = Bernstein3DDerivative(patch.points[0]->pos, patch.points[1]->pos, patch.points[2]->pos, patch.points[3]->pos, bu);
|
||||
Vec3Packedf derivU2 = Bernstein3DDerivative(patch.points[4]->pos, patch.points[5]->pos, patch.points[6]->pos, patch.points[7]->pos, bu);
|
||||
Vec3Packedf derivU3 = Bernstein3DDerivative(patch.points[8]->pos, patch.points[9]->pos, patch.points[10]->pos, patch.points[11]->pos, bu);
|
||||
Vec3Packedf derivU4 = Bernstein3DDerivative(patch.points[12]->pos, patch.points[13]->pos, patch.points[14]->pos, patch.points[15]->pos, bu);
|
||||
Vec3Packedf derivU = Bernstein3D(derivU1, derivU2, derivU3, derivU4, bv);
|
||||
Vec3Packedf derivV = Bernstein3DDerivative(pos1, pos2, pos3, pos4, bv);
|
||||
|
||||
// TODO: Interpolate normals instead of generating them, if available?
|
||||
vert.nrm = Cross(derivU, derivV).Normalized();
|
||||
|
@ -167,6 +167,72 @@ float Vec3<float>::Normalize()
|
||||
return len;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec3Packed<float> Vec3Packed<float>::FromRGB(unsigned int rgb)
|
||||
{
|
||||
return Vec3Packed((rgb & 0xFF) * (1.0f/255.0f),
|
||||
((rgb >> 8) & 0xFF) * (1.0f/255.0f),
|
||||
((rgb >> 16) & 0xFF) * (1.0f/255.0f));
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec3Packed<int> Vec3Packed<int>::FromRGB(unsigned int rgb)
|
||||
{
|
||||
return Vec3Packed(rgb & 0xFF, (rgb >> 8) & 0xFF, (rgb >> 16) & 0xFF);
|
||||
}
|
||||
|
||||
template<>
|
||||
unsigned int Vec3Packed<float>::ToRGB() const
|
||||
{
|
||||
return ((unsigned int)(r()*255.f)) +
|
||||
((unsigned int)(g()*255.f*256.f)) +
|
||||
((unsigned int)(b()*255.f*256.f*256.f));
|
||||
}
|
||||
|
||||
template<>
|
||||
unsigned int Vec3Packed<int>::ToRGB() const
|
||||
{
|
||||
return (r()&0xFF) | ((g()&0xFF)<<8) | ((b()&0xFF)<<16);
|
||||
}
|
||||
|
||||
template<>
|
||||
float Vec3Packed<float>::Length() const
|
||||
{
|
||||
return sqrtf(Length2());
|
||||
}
|
||||
|
||||
template<>
|
||||
void Vec3Packed<float>::SetLength(const float l)
|
||||
{
|
||||
(*this) *= l / Length();
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec3Packed<float> Vec3Packed<float>::WithLength(const float l) const
|
||||
{
|
||||
return (*this) * l / Length();
|
||||
}
|
||||
|
||||
template<>
|
||||
float Vec3Packed<float>::Distance2To(Vec3Packed<float> &other)
|
||||
{
|
||||
return Vec3Packed<float>(other-(*this)).Length2();
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec3Packed<float> Vec3Packed<float>::Normalized() const
|
||||
{
|
||||
return (*this) / Length();
|
||||
}
|
||||
|
||||
template<>
|
||||
float Vec3Packed<float>::Normalize()
|
||||
{
|
||||
float len = Length();
|
||||
(*this) = (*this)/len;
|
||||
return len;
|
||||
}
|
||||
|
||||
template<>
|
||||
Vec4<float> Vec4<float>::FromRGBA(unsigned int rgba)
|
||||
{
|
||||
|
170
GPU/Math3D.h
170
GPU/Math3D.h
@ -347,6 +347,169 @@ public:
|
||||
#undef _DEFINE_SWIZZLER2
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class Vec3Packed
|
||||
{
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
T x,y,z;
|
||||
};
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
const T* AsArray() const { return &x; }
|
||||
|
||||
Vec3Packed() {}
|
||||
Vec3Packed(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||
Vec3Packed(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
|
||||
Vec3Packed(const Vec2<T>& _xy, const T& _z) : x(_xy.x), y(_xy.y), z(_z) {}
|
||||
|
||||
template<typename T2>
|
||||
Vec3Packed<T2> Cast() const
|
||||
{
|
||||
return Vec3Packed<T2>((T2)x, (T2)y, (T2)z);
|
||||
}
|
||||
|
||||
// Only implemented for T=int and T=float
|
||||
static Vec3Packed FromRGB(unsigned int rgb);
|
||||
unsigned int ToRGB() const; // alpha bits set to zero
|
||||
|
||||
static Vec3Packed AssignToAll(const T& f)
|
||||
{
|
||||
return Vec3Packed<T>(f, f, f);
|
||||
}
|
||||
|
||||
void Write(T a[3])
|
||||
{
|
||||
a[0] = x; a[1] = y; a[2] = z;
|
||||
}
|
||||
|
||||
Vec3Packed operator +(const Vec3Packed &other) const
|
||||
{
|
||||
return Vec3Packed(x+other.x, y+other.y, z+other.z);
|
||||
}
|
||||
void operator += (const Vec3Packed &other)
|
||||
{
|
||||
x+=other.x; y+=other.y; z+=other.z;
|
||||
}
|
||||
Vec3Packed operator -(const Vec3Packed &other) const
|
||||
{
|
||||
return Vec3Packed(x-other.x, y-other.y, z-other.z);
|
||||
}
|
||||
void operator -= (const Vec3Packed &other)
|
||||
{
|
||||
x-=other.x; y-=other.y; z-=other.z;
|
||||
}
|
||||
Vec3Packed operator -() const
|
||||
{
|
||||
return Vec3Packed(-x,-y,-z);
|
||||
}
|
||||
Vec3Packed operator * (const Vec3Packed &other) const
|
||||
{
|
||||
return Vec3Packed(x*other.x, y*other.y, z*other.z);
|
||||
}
|
||||
template<typename V>
|
||||
Vec3Packed operator * (const V& f) const
|
||||
{
|
||||
return Vec3Packed(x*f,y*f,z*f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator *= (const V& f)
|
||||
{
|
||||
x*=f; y*=f; z*=f;
|
||||
}
|
||||
template<typename V>
|
||||
Vec3Packed operator / (const V& f) const
|
||||
{
|
||||
return Vec3Packed(x/f,y/f,z/f);
|
||||
}
|
||||
template<typename V>
|
||||
void operator /= (const V& f)
|
||||
{
|
||||
*this = *this / f;
|
||||
}
|
||||
|
||||
T Length2() const
|
||||
{
|
||||
return x*x + y*y + z*z;
|
||||
}
|
||||
|
||||
Vec3Packed Clamp(const T &l, const T &h) const
|
||||
{
|
||||
return Vec3Packed(VecClamp(x, l, h), VecClamp(y, l, h), VecClamp(z, l, h));
|
||||
}
|
||||
|
||||
// Only implemented for T=float
|
||||
float Length() const;
|
||||
void SetLength(const float l);
|
||||
Vec3Packed WithLength(const float l) const;
|
||||
float Distance2To(Vec3Packed &other);
|
||||
Vec3Packed Normalized() const;
|
||||
float Normalize(); // returns the previous length, which is often useful
|
||||
|
||||
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
T operator [] (const int i) const
|
||||
{
|
||||
return *((&x) + i);
|
||||
}
|
||||
|
||||
void SetZero()
|
||||
{
|
||||
x=0; y=0; z=0;
|
||||
}
|
||||
|
||||
// Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates)
|
||||
T& u() { return x; }
|
||||
T& v() { return y; }
|
||||
T& w() { return z; }
|
||||
|
||||
T& r() { return x; }
|
||||
T& g() { return y; }
|
||||
T& b() { return z; }
|
||||
|
||||
T& s() { return x; }
|
||||
T& t() { return y; }
|
||||
T& q() { return z; }
|
||||
|
||||
const T& u() const { return x; }
|
||||
const T& v() const { return y; }
|
||||
const T& w() const { return z; }
|
||||
|
||||
const T& r() const { return x; }
|
||||
const T& g() const { return y; }
|
||||
const T& b() const { return z; }
|
||||
|
||||
const T& s() const { return x; }
|
||||
const T& t() const { return y; }
|
||||
const T& q() const { return z; }
|
||||
|
||||
// swizzlers - create a subvector of specific components
|
||||
// e.g. Vec2 uv() { return Vec2(x,y); }
|
||||
// _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx)
|
||||
#define _DEFINE_SWIZZLER2(a, b, name) Vec2<T> name() const { return Vec2<T>(a, b); }
|
||||
#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \
|
||||
_DEFINE_SWIZZLER2(a, b, a##b); \
|
||||
_DEFINE_SWIZZLER2(a, b, a2##b2); \
|
||||
_DEFINE_SWIZZLER2(a, b, a3##b3); \
|
||||
_DEFINE_SWIZZLER2(a, b, a4##b4); \
|
||||
_DEFINE_SWIZZLER2(b, a, b##a); \
|
||||
_DEFINE_SWIZZLER2(b, a, b2##a2); \
|
||||
_DEFINE_SWIZZLER2(b, a, b3##a3); \
|
||||
_DEFINE_SWIZZLER2(b, a, b4##a4);
|
||||
|
||||
DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t);
|
||||
DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q);
|
||||
DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q);
|
||||
#undef DEFINE_SWIZZLER2
|
||||
#undef _DEFINE_SWIZZLER2
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class Vec4
|
||||
{
|
||||
@ -629,6 +792,7 @@ private:
|
||||
}; // namespace Math3D
|
||||
|
||||
typedef Math3D::Vec3<float> Vec3f;
|
||||
typedef Math3D::Vec3Packed<float> Vec3Packedf;
|
||||
typedef Math3D::Vec4<float> Vec4f;
|
||||
|
||||
|
||||
@ -721,6 +885,12 @@ inline Vec3<T> Cross(const Vec3<T>& a, const Vec3<T>& b)
|
||||
return Vec3<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline Vec3Packed<T> Cross(const Vec3Packed<T>& a, const Vec3Packed<T>& b)
|
||||
{
|
||||
return Vec3Packed<T>(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
|
||||
}
|
||||
|
||||
}; // namespace Math3D
|
||||
|
||||
// linear interpolation via float: 0.0=begin, 1.0=end
|
||||
|
Loading…
Reference in New Issue
Block a user