mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-28 10:51:06 +00:00
softgpu: Use SSE for gouraud shading.
This commit is contained in:
parent
743854afc8
commit
dd140b73bb
50
GPU/Math3D.h
50
GPU/Math3D.h
@ -18,6 +18,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include "Common/Common.h"
|
||||
|
||||
#if defined(_M_SSE)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace Math3D {
|
||||
|
||||
@ -36,9 +41,16 @@ template<typename T>
|
||||
class Vec2
|
||||
{
|
||||
public:
|
||||
struct
|
||||
union
|
||||
{
|
||||
T x,y;
|
||||
struct
|
||||
{
|
||||
T x,y;
|
||||
};
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#endif
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
@ -47,6 +59,10 @@ public:
|
||||
Vec2() {}
|
||||
Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
|
||||
Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
|
||||
#if defined(_M_SSE)
|
||||
Vec2(const __m128 &_vec) : vec(_vec) {}
|
||||
Vec2(const __m128i &_ivec) : ivec(_ivec) {}
|
||||
#endif
|
||||
|
||||
template<typename T2>
|
||||
Vec2<T2> Cast() const
|
||||
@ -164,9 +180,16 @@ template<typename T>
|
||||
class Vec3
|
||||
{
|
||||
public:
|
||||
struct
|
||||
union
|
||||
{
|
||||
T x,y,z;
|
||||
struct
|
||||
{
|
||||
T x,y,z;
|
||||
};
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#endif
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
@ -176,6 +199,10 @@ public:
|
||||
Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {}
|
||||
Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {}
|
||||
Vec3(const Vec2<T>& _xy, const T& _z) : x(_xy.x), y(_xy.y), z(_z) {}
|
||||
#if defined(_M_SSE)
|
||||
Vec3(const __m128 &_vec) : vec(_vec) {}
|
||||
Vec3(const __m128i &_ivec) : ivec(_ivec) {}
|
||||
#endif
|
||||
|
||||
template<typename T2>
|
||||
Vec3<T2> Cast() const
|
||||
@ -324,9 +351,16 @@ template<typename T>
|
||||
class Vec4
|
||||
{
|
||||
public:
|
||||
struct
|
||||
union
|
||||
{
|
||||
T x,y,z,w;
|
||||
struct
|
||||
{
|
||||
T x,y,z,w;
|
||||
};
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#endif
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
@ -337,6 +371,10 @@ public:
|
||||
Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {}
|
||||
Vec4(const Vec2<T>& _xy, const T& _z, const T& _w) : x(_xy.x), y(_xy.y), z(_z), w(_w) {}
|
||||
Vec4(const Vec3<T>& _xyz, const T& _w) : x(_xyz.x), y(_xyz.y), z(_xyz.z), w(_w) {}
|
||||
#if defined(_M_SSE)
|
||||
Vec4(const __m128 &_vec) : vec(_vec) {}
|
||||
Vec4(const __m128i &_ivec) : ivec(_ivec) {}
|
||||
#endif
|
||||
|
||||
template<typename T2>
|
||||
Vec4<T2> Cast() const
|
||||
|
@ -30,6 +30,10 @@
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#if defined(_M_SSE)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
extern FormatBuffer fb;
|
||||
extern FormatBuffer depthbuf;
|
||||
|
||||
@ -978,6 +982,46 @@ inline void ApplyTexturing(Vec3<int> &prim_color_rgb, int &prim_color_a, float s
|
||||
prim_color_a = out.a();
|
||||
}
|
||||
|
||||
#if defined(_M_SSE)
|
||||
static inline __m128 Interpolate(const __m128 &c0, const __m128 &c1, const __m128 &c2, int w0, int w1, int w2, float wsum) {
|
||||
__m128 v = _mm_mul_ps(c0, _mm_cvtepi32_ps(_mm_set1_epi32(w0)));
|
||||
v = _mm_add_ps(v, _mm_mul_ps(c1, _mm_cvtepi32_ps(_mm_set1_epi32(w1))));
|
||||
v = _mm_add_ps(v, _mm_mul_ps(c2, _mm_cvtepi32_ps(_mm_set1_epi32(w2))));
|
||||
return _mm_mul_ps(v, _mm_set_ps1(wsum));
|
||||
}
|
||||
|
||||
static inline __m128i Interpolate(const __m128i &c0, const __m128i &c1, const __m128i &c2, int w0, int w1, int w2, float wsum) {
|
||||
return _mm_cvtps_epi32(Interpolate(_mm_cvtepi32_ps(c0), _mm_cvtepi32_ps(c1), _mm_cvtepi32_ps(c2), w0, w1, w2, wsum));
|
||||
}
|
||||
#endif
|
||||
|
||||
// NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues.
|
||||
// Not sure if that should be regarded as a bug or if casting to float is a valid fix.
|
||||
|
||||
static inline Vec4<int> Interpolate(const Vec4<int> &c0, const Vec4<int> &c1, const Vec4<int> &c2, int w0, int w1, int w2, float wsum) {
|
||||
#if defined(_M_SSE)
|
||||
return Vec4<int>(Interpolate(c0.ivec, c1.ivec, c2.ivec, w0, w1, w2, wsum));
|
||||
#else
|
||||
return ((c0.Cast<float>() * w0 + c1.Cast<float>() * w1 + c2.Cast<float>() * w2) * wsum).Cast<int>();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline Vec3<int> Interpolate(const Vec3<int> &c0, const Vec3<int> &c1, const Vec3<int> &c2, int w0, int w1, int w2, float wsum) {
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(Interpolate(c0.ivec, c1.ivec, c2.ivec, w0, w1, w2, wsum));
|
||||
#else
|
||||
return ((c0.Cast<float>() * w0 + c1.Cast<float>() * w1 + c2.Cast<float>() * w2) * wsum).Cast<int>();
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline Vec2<float> Interpolate(const Vec2<float> &c0, const Vec2<float> &c1, const Vec2<float> &c2, int w0, int w1, int w2, float wsum) {
|
||||
#if defined(_M_SSE)
|
||||
return Vec2<float>(Interpolate(c0.vec, c1.vec, c2.vec, w0, w1, w2, wsum));
|
||||
#else
|
||||
return (c0 * w0 + c1 * w1 + c2 * w2) * wsum;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <bool clearMode>
|
||||
void DrawTriangleSlice(
|
||||
const VertexData& v0, const VertexData& v1, const VertexData& v2,
|
||||
@ -1067,16 +1111,11 @@ void DrawTriangleSlice(
|
||||
int prim_color_a = 0;
|
||||
Vec3<int> sec_color(0, 0, 0);
|
||||
if (gstate.getShadeMode() == GE_SHADE_GOURAUD && !clearMode) {
|
||||
// NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues.
|
||||
// Not sure if that should be regarded as a bug or if casting to float is a valid fix.
|
||||
// TODO: Is that the correct way to interpolate?
|
||||
prim_color_rgb = ((v0.color0.rgb().Cast<float>() * w0 +
|
||||
v1.color0.rgb().Cast<float>() * w1 +
|
||||
v2.color0.rgb().Cast<float>() * w2) * wsum).Cast<int>();
|
||||
prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) * wsum);
|
||||
sec_color = ((v0.color1.Cast<float>() * w0 +
|
||||
v1.color1.Cast<float>() * w1 +
|
||||
v2.color1.Cast<float>() * w2) * wsum).Cast<int>();
|
||||
const Vec4<int> prim_color = Interpolate(v0.color0, v1.color0, v2.color0, w0, w1, w2, wsum);
|
||||
prim_color_rgb = prim_color.rgb();
|
||||
prim_color_a = prim_color.a();
|
||||
sec_color = Interpolate(v0.color1, v1.color1, v2.color1, w0, w1, w2, wsum);
|
||||
} else {
|
||||
prim_color_rgb = v2.color0.rgb();
|
||||
prim_color_a = v2.color0.a();
|
||||
|
Loading…
Reference in New Issue
Block a user