Merge pull request from neobrain/softgpu2

Software Renderer
This commit is contained in:
Henrik Rydgård 2013-08-17 02:22:22 -07:00
commit fa59ba4e3d
25 changed files with 3127 additions and 20 deletions

@ -1021,6 +1021,16 @@ add_library(GPU OBJECT
GPU/Math3D.h
GPU/Null/NullGpu.cpp
GPU/Null/NullGpu.h
GPU/Software/Clipper.cpp
GPU/Software/Clipper.h
GPU/Software/Lighting.cpp
GPU/Software/Lighting.h
GPU/Software/Rasterizer.cpp
GPU/Software/Rasterizer.h
GPU/Software/SoftGpu.cpp
GPU/Software/SoftGpu.h
GPU/Software/TransformUnit.cpp
GPU/Software/TransformUnit.h
GPU/ge_constants.h)
setup_target_project(GPU GPU)

@ -108,6 +108,7 @@ void Config::Load(const char *iniFileName)
1
#endif
); // default is buffered rendering mode
graphics->Get("SoftwareRendering", &bSoftwareRendering, false);
graphics->Get("HardwareTransform", &bHardwareTransform, true);
graphics->Get("TextureFiltering", &iTexFiltering, 1);
graphics->Get("SSAA", &bAntiAliasing, 0);
@ -242,6 +243,7 @@ void Config::Save()
graphics->Set("ShowFPSCounter", iShowFPSCounter);
graphics->Set("ResolutionScale", iWindowZoom);
graphics->Set("RenderingMode", iRenderingMode);
graphics->Set("SoftwareRendering", bSoftwareRendering);
graphics->Set("HardwareTransform", bHardwareTransform);
graphics->Set("TextureFiltering", iTexFiltering);
graphics->Set("SSAA", bAntiAliasing);

@ -65,7 +65,8 @@ public:
std::string languageIni;
// GFX
bool bHardwareTransform;
bool bSoftwareRendering;
bool bHardwareTransform; // only used in the GLES backend
int iRenderingMode; // 0 = non-buffered rendering 1 = buffered rendering 2 = Read Framebuffer to memory (CPU) 3 = Read Framebuffer to memory (GPU)
int iTexFiltering; // 1 = off , 2 = nearest , 3 = linear , 4 = linear(CG)
#ifdef BLACKBERRY

@ -14,6 +14,11 @@ set(SRCS
GLES/VertexDecoder.cpp
GLES/VertexShaderGenerator.cpp
Null/NullGpu.cpp
Software/Clipper.cpp
Software/Lighting.cpp
Software/Rasterizer.cpp
Software/SoftGpu.cpp
Software/TransformUnit.cpp
)
set(SRCS ${SRCS})

@ -158,6 +158,12 @@
<ClInclude Include="GPUState.h" />
<ClInclude Include="Math3D.h" />
<ClInclude Include="Null\NullGpu.h" />
<ClInclude Include="Software\Clipper.h" />
<ClInclude Include="Software\Colors.h" />
<ClInclude Include="Software\Lighting.h" />
<ClInclude Include="Software\Rasterizer.h" />
<ClInclude Include="Software\SoftGpu.h" />
<ClInclude Include="Software\TransformUnit.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
@ -179,6 +185,11 @@
<ClCompile Include="GPUState.cpp" />
<ClCompile Include="Math3D.cpp" />
<ClCompile Include="Null\NullGpu.cpp" />
<ClCompile Include="Software\Clipper.cpp" />
<ClCompile Include="Software\Lighting.cpp" />
<ClCompile Include="Software\Rasterizer.cpp" />
<ClCompile Include="Software\SoftGpu.cpp" />
<ClCompile Include="Software\TransformUnit.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.vcxproj">
@ -191,4 +202,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

@ -68,6 +68,24 @@
<ClInclude Include="GLES\TextureScaler.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="Software\Colors.h">
<Filter>Software</Filter>
</ClInclude>
<ClInclude Include="Software\Clipper.h">
<Filter>Software</Filter>
</ClInclude>
<ClInclude Include="Software\Lighting.h">
<Filter>Software</Filter>
</ClInclude>
<ClInclude Include="Software\Rasterizer.h">
<Filter>Software</Filter>
</ClInclude>
<ClInclude Include="Software\SoftGpu.h">
<Filter>Software</Filter>
</ClInclude>
<ClInclude Include="Software\TransformUnit.h">
<Filter>Software</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -117,8 +135,23 @@
<ClCompile Include="GLES\TextureScaler.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="Software\Clipper.cpp">
<Filter>Software</Filter>
</ClCompile>
<ClCompile Include="Software\Lighting.cpp">
<Filter>Software</Filter>
</ClCompile>
<ClCompile Include="Software\Rasterizer.cpp">
<Filter>Software</Filter>
</ClCompile>
<ClCompile Include="Software\SoftGpu.cpp">
<Filter>Software</Filter>
</ClCompile>
<ClCompile Include="Software\TransformUnit.cpp">
<Filter>Software</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>

@ -20,6 +20,7 @@
#include "GLES/ShaderManager.h"
#include "GLES/DisplayListInterpreter.h"
#include "Null/NullGpu.h"
#include "Software/SoftGpu.h"
#include "../Core/CoreParameter.h"
#include "../Core/System.h"
@ -37,7 +38,7 @@ void GPU_Init() {
gpu = new GLES_GPU();
break;
case GPU_SOFTWARE:
gpu = new NullGPU();
gpu = new SoftGPU();
break;
}
}

@ -211,6 +211,10 @@ struct GPUgstate
float tgenMatrix[12];
float boneMatrix[12 * 8]; // Eight bone matrices.
GEBufferFormat FrameBufFormat() const { return static_cast<GEBufferFormat>(framebufpixformat & 3); }
int FrameBufStride() const { return fbwidth&0x7C0; }
int DepthBufStride() const { return zbwidth&0x7C0; }
// Pixel Pipeline
bool isModeClear() const { return clearmode & 1; }
bool isFogEnabled() const { return fogEnable & 1; }
@ -221,6 +225,7 @@ struct GPUgstate
bool isClearModeDepthWriteEnabled() const { return (clearmode&0x400) != 0; }
bool isClearModeColorMask() const { return (clearmode&0x100) != 0; }
bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; }
u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0xFFFFFF : 0) | ((clearmode&0x200) ? 0xFF000000 : 0); } // TODO: Different convention than getColorMask, confusing!
// Blend
int getBlendFuncA() const { return blend & 0xF; }
@ -322,10 +327,14 @@ struct GPUgstate
unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; }
// UV gen
int getUVGenMode() const { return texmapmode & 3;} // 2 bits
int getUVProjMode() const { return (texmapmode >> 8) & 3;} // 2 bits
GETexMapMode getUVGenMode() const { return static_cast<GETexMapMode>(texmapmode & 3);} // 2 bits
GETexProjMapMode getUVProjMode() const { return static_cast<GETexProjMapMode>((texmapmode >> 8) & 3);} // 2 bits
int getUVLS0() const { return texshade & 0x3; } // 2 bits
int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits
bool isTexCoordClampedS() const { return texwrap & 1; }
bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; }
int getScissorX1() const { return scissor1 & 0x3FF; }
int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; }
int getScissorX2() const { return scissor2 & 0x3FF; }
@ -341,6 +350,9 @@ struct GPUgstate
bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; }
int getWeightMask() const { return vertType & GE_VTYPE_WEIGHT_MASK; }
int getNumBoneWeights() const { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }
GEPatchPrimType getPatchPrimitiveType() const { return static_cast<GEPatchPrimType>(patchprimitive & 3); }
// Real data in the context ends here
};

292
GPU/Software/Clipper.cpp Normal file

@ -0,0 +1,292 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "../GPUState.h"
#include "Clipper.h"
#include "Rasterizer.h"
namespace Clipper {
enum {
SKIP_FLAG = -1,
CLIP_POS_X_BIT = 0x01,
CLIP_NEG_X_BIT = 0x02,
CLIP_POS_Y_BIT = 0x04,
CLIP_NEG_Y_BIT = 0x08,
CLIP_POS_Z_BIT = 0x10,
CLIP_NEG_Z_BIT = 0x20,
};
static inline int CalcClipMask(const ClipCoords& v)
{
int mask = 0;
if (v.x > v.w) mask |= CLIP_POS_X_BIT;
if (v.x < -v.w) mask |= CLIP_NEG_X_BIT;
if (v.y > v.w) mask |= CLIP_POS_Y_BIT;
if (v.y < -v.w) mask |= CLIP_NEG_Y_BIT;
if (v.z > v.w) mask |= CLIP_POS_Z_BIT;
if (v.z < -v.w) mask |= CLIP_NEG_Z_BIT;
return mask;
}
#define AddInterpolatedVertex(t, out, in, numVertices) \
{ \
Vertices[numVertices]->Lerp(t, *Vertices[out], *Vertices[in]); \
numVertices++; \
}
#define DIFFERENT_SIGNS(x,y) ((x <= 0 && y > 0) || (x > 0 && y <= 0))
#define CLIP_DOTPROD(I, A, B, C, D) \
(Vertices[I]->clippos.x * A + Vertices[I]->clippos.y * B + Vertices[I]->clippos.z * C + Vertices[I]->clippos.w * D)
#define POLY_CLIP( PLANE_BIT, A, B, C, D ) \
{ \
if (mask & PLANE_BIT) { \
int idxPrev = inlist[0]; \
float dpPrev = CLIP_DOTPROD(idxPrev, A, B, C, D ); \
int outcount = 0; \
\
inlist[n] = inlist[0]; \
for (int j = 1; j <= n; j++) { \
int idx = inlist[j]; \
float dp = CLIP_DOTPROD(idx, A, B, C, D ); \
if (dpPrev >= 0) { \
outlist[outcount++] = idxPrev; \
} \
\
if (DIFFERENT_SIGNS(dp, dpPrev)) { \
if (dp < 0) { \
float t = dp / (dp - dpPrev); \
AddInterpolatedVertex(t, idx, idxPrev, numVertices); \
} else { \
float t = dpPrev / (dpPrev - dp); \
AddInterpolatedVertex(t, idxPrev, idx, numVertices); \
} \
outlist[outcount++] = numVertices - 1; \
} \
\
idxPrev = idx; \
dpPrev = dp; \
} \
\
if (outcount < 3) \
continue; \
\
{ \
int *tmp = inlist; \
inlist = outlist; \
outlist = tmp; \
n = outcount; \
} \
} \
}
#define CLIP_LINE(PLANE_BIT, A, B, C, D) \
{ \
if (mask & PLANE_BIT) { \
float dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
float dp1 = CLIP_DOTPROD(1, A, B, C, D ); \
int i = 0; \
\
if (mask0 & PLANE_BIT) { \
if (dp0 < 0) { \
float t = dp1 / (dp1 - dp0); \
i = 0; \
AddInterpolatedVertex(t, 1, 0, i); \
} \
} \
dp0 = CLIP_DOTPROD(0, A, B, C, D ); \
\
if (mask1 & PLANE_BIT) { \
if (dp1 < 0) { \
float t = dp1 / (dp1- dp0); \
i = 1; \
AddInterpolatedVertex(t, 1, 0, i); \
} \
} \
} \
}
void ProcessQuad(const VertexData& v0, const VertexData& v1)
{
if (!gstate.isModeThrough()) {
VertexData buf[4];
buf[0].clippos = ClipCoords(v0.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w);
buf[0].texturecoords = v0.texturecoords;
buf[1].clippos = ClipCoords(v0.clippos.x, v1.clippos.y, v1.clippos.z, v1.clippos.w);
buf[1].texturecoords = Vec2<float>(v0.texturecoords.x, v1.texturecoords.y);
buf[2].clippos = ClipCoords(v1.clippos.x, v0.clippos.y, v1.clippos.z, v1.clippos.w);
buf[2].texturecoords = Vec2<float>(v1.texturecoords.x, v0.texturecoords.y);
buf[3] = v1;
// Color and depth values of second vertex are used for the whole rectangle
buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
VertexData* topleft = &buf[0];
VertexData* topright = &buf[1];
VertexData* bottomleft = &buf[2];
VertexData* bottomright = &buf[3];
for (int i = 0; i < 4; ++i) {
if (buf[i].clippos.x < topleft->clippos.x && buf[i].clippos.y < topleft->clippos.y)
topleft = &buf[i];
if (buf[i].clippos.x > topright->clippos.x && buf[i].clippos.y < topright->clippos.y)
topright = &buf[i];
if (buf[i].clippos.x < bottomleft->clippos.x && buf[i].clippos.y > bottomleft->clippos.y)
bottomleft = &buf[i];
if (buf[i].clippos.x > bottomright->clippos.x && buf[i].clippos.y > bottomright->clippos.y)
bottomright = &buf[i];
}
ProcessTriangle(*topleft, *topright, *bottomright);
ProcessTriangle(*bottomright, *topright, *topleft);
ProcessTriangle(*bottomright, *bottomleft, *topleft);
ProcessTriangle(*topleft, *bottomleft, *bottomright);
} else {
// through mode handling
VertexData buf[4];
buf[0].screenpos = ScreenCoords(v0.screenpos.x, v0.screenpos.y, v1.screenpos.z);
buf[0].texturecoords = v0.texturecoords;
buf[1].screenpos = ScreenCoords(v0.screenpos.x, v1.screenpos.y, v1.screenpos.z);
buf[1].texturecoords = Vec2<float>(v0.texturecoords.x, v1.texturecoords.y);
buf[2].screenpos = ScreenCoords(v1.screenpos.x, v0.screenpos.y, v1.screenpos.z);
buf[2].texturecoords = Vec2<float>(v1.texturecoords.x, v0.texturecoords.y);
buf[3] = v1;
// Color and depth values of second vertex are used for the whole rectangle
buf[0].color0 = buf[1].color0 = buf[2].color0 = buf[3].color0;
buf[0].color1 = buf[1].color1 = buf[2].color1 = buf[3].color1;
buf[0].clippos.w = buf[1].clippos.w = buf[2].clippos.w = buf[3].clippos.w = 1.0f;
VertexData* topleft = &buf[0];
VertexData* topright = &buf[1];
VertexData* bottomleft = &buf[2];
VertexData* bottomright = &buf[3];
for (int i = 0; i < 4; ++i) {
if (buf[i].screenpos.x < topleft->screenpos.x && buf[i].screenpos.y < topleft->screenpos.y)
topleft = &buf[i];
if (buf[i].screenpos.x > topright->screenpos.x && buf[i].screenpos.y < topright->screenpos.y)
topright = &buf[i];
if (buf[i].screenpos.x < bottomleft->screenpos.x && buf[i].screenpos.y > bottomleft->screenpos.y)
bottomleft = &buf[i];
if (buf[i].screenpos.x > bottomright->screenpos.x && buf[i].screenpos.y > bottomright->screenpos.y)
bottomright = &buf[i];
}
Rasterizer::DrawTriangle(*topleft, *topright, *bottomright);
Rasterizer::DrawTriangle(*bottomright, *topright, *topleft);
Rasterizer::DrawTriangle(*bottomright, *bottomleft, *topleft);
Rasterizer::DrawTriangle(*topleft, *bottomleft, *bottomright);
}
}
void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2)
{
if (gstate.isModeThrough()) {
Rasterizer::DrawTriangle(v0, v1, v2);
return;
}
enum { NUM_CLIPPED_VERTICES = 33, NUM_INDICES = NUM_CLIPPED_VERTICES + 3 };
VertexData* Vertices[NUM_INDICES];
VertexData ClippedVertices[NUM_CLIPPED_VERTICES];
for (int i = 0; i < NUM_CLIPPED_VERTICES; ++i)
Vertices[i+3] = &ClippedVertices[i];
// TODO: Change logic when it's a backface
Vertices[0] = &v0;
Vertices[1] = &v1;
Vertices[2] = &v2;
int indices[NUM_INDICES] = { 0, 1, 2, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG,
SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG,
SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG, SKIP_FLAG };
int numIndices = 3;
int mask = 0;
mask |= CalcClipMask(v0.clippos);
mask |= CalcClipMask(v1.clippos);
mask |= CalcClipMask(v2.clippos);
if (mask && (gstate.clipEnable & 0x1)) {
// discard if any vertex is outside the near clipping plane
if (mask & CLIP_NEG_Z_BIT)
return;
for(int i = 0; i < 3; i += 3) {
int vlist[2][2*6+1];
int *inlist = vlist[0], *outlist = vlist[1];
int n = 3;
int numVertices = 3;
inlist[0] = 0;
inlist[1] = 1;
inlist[2] = 2;
// mark this triangle as unused in case it should be completely clipped
indices[0] = SKIP_FLAG;
indices[1] = SKIP_FLAG;
indices[2] = SKIP_FLAG;
POLY_CLIP(CLIP_POS_X_BIT, -1, 0, 0, 1);
POLY_CLIP(CLIP_NEG_X_BIT, 1, 0, 0, 1);
POLY_CLIP(CLIP_POS_Y_BIT, 0, -1, 0, 1);
POLY_CLIP(CLIP_NEG_Y_BIT, 0, 1, 0, 1);
POLY_CLIP(CLIP_POS_Z_BIT, 0, 0, 0, 1);
POLY_CLIP(CLIP_NEG_Z_BIT, 0, 0, 1, 1);
// transform the poly in inlist into triangles
indices[0] = inlist[0];
indices[1] = inlist[1];
indices[2] = inlist[2];
for (int j = 3; j < n; ++j) {
indices[numIndices++] = inlist[0];
indices[numIndices++] = inlist[j - 1];
indices[numIndices++] = inlist[j];
}
}
} else if (CalcClipMask(v0.clippos) & CalcClipMask(v1.clippos) & CalcClipMask(v2.clippos)) {
// If clipping is disabled, only discard the current primitive
// if all three vertices lie outside one of the clipping planes
return;
}
for(int i = 0; i+3 <= numIndices; i+=3)
{
if(indices[i] != SKIP_FLAG)
{
VertexData data[3] = { *Vertices[indices[i]], *Vertices[indices[i+1]], *Vertices[indices[i+2]] };
data[0].screenpos = TransformUnit::ClipToScreen(data[0].clippos);
data[1].screenpos = TransformUnit::ClipToScreen(data[1].clippos);
data[2].screenpos = TransformUnit::ClipToScreen(data[2].clippos);
Rasterizer::DrawTriangle(data[0], data[1], data[2]);
}
}
}
} // namespace

27
GPU/Software/Clipper.h Normal file

@ -0,0 +1,27 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "TransformUnit.h"
namespace Clipper {
void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2);
void ProcessQuad(const VertexData& v0, const VertexData& v1);
}

104
GPU/Software/Colors.h Normal file

@ -0,0 +1,104 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "CommonTypes.h"
static inline u32 DecodeRGBA4444(u16 src)
{
u8 r = (src>>12) & 0x0F;
u8 g = (src>>8) & 0x0F;
u8 b = (src>>4) & 0x0F;
u8 a = (src>>0) & 0x0F;
r = (r << 4) | r;
g = (g << 4) | g;
b = (b << 4) | b;
a = (a << 4) | a;
return (a << 24) | (b << 16) | (g << 8) | r;
}
static inline u32 DecodeRGBA5551(u16 src)
{
u8 r = src & 0x1F;
u8 g = (src >> 5) & 0x1F;
u8 b = (src >> 10) & 0x1F;
u8 a = (src >> 15) & 0x1;
r = (r << 3) | (r >> 2);
g = (g << 3) | (g >> 2);
b = (b << 3) | (b >> 2);
a = (a) ? 0xff : 0;
return (a << 24) | (b << 16) | (g << 8) | r;
}
static inline u32 DecodeRGB565(u16 src)
{
u8 r = src & 0x1F;
u8 g = (src >> 5) & 0x3F;
u8 b = (src >> 11) & 0x1F;
u8 a = 0xFF;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
return (a << 24) | (b << 16) | (g << 8) | r;
}
static inline u32 DecodeRGBA8888(u32 src)
{
u8 r = src & 0xFF;
u8 g = (src >> 8) & 0xFF;
u8 b = (src >> 16) & 0xFF;
u8 a = (src >> 24) & 0xFF;
return (a << 24) | (b << 16) | (g << 8) | r;
}
static inline u16 RGBA8888To565(u32 value)
{
u8 r = value & 0xFF;
u8 g = (value >> 8) & 0xFF;
u8 b = (value >> 16) & 0xFF;
r >>= 3;
g >>= 2;
b >>= 3;
return (u16)r | ((u16)g << 5) | ((u16)b << 11);
}
static inline u16 RGBA8888To5551(u32 value)
{
u8 r = value & 0xFF;
u8 g = (value >> 8) & 0xFF;
u8 b = (value >> 16) & 0xFF;
u8 a = (value >> 24) & 0xFF;
r >>= 3;
g >>= 3;
b >>= 3;
a >>= 7;
return (u16)r | ((u16)g << 5) | ((u16)b << 10) | ((u16)a << 15);
}
static inline u16 RGBA8888To4444(u32 value)
{
u8 r = value & 0xFF;
u8 g = (value >> 8) & 0xFF;
u8 b = (value >> 16) & 0xFF;
u8 a = (value >> 24) & 0xFF;
r >>= 4;
g >>= 4;
b >>= 4;
a >>= 4;
return (u16)r | ((u16)g << 4) | ((u16)b << 8) | ((u16)a << 12);
}

167
GPU/Software/Lighting.cpp Normal file

@ -0,0 +1,167 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "../GPUState.h"
#include "Lighting.h"
namespace Lighting {
void Process(VertexData& vertex)
{
Vec3<int> mec = Vec3<int>(gstate.getMaterialEmissiveR(), gstate.getMaterialEmissiveG(), gstate.getMaterialEmissiveB());
Vec3<int> mac = (gstate.materialupdate&1)
? vertex.color0.rgb()
: Vec3<int>(gstate.getMaterialAmbientR(), gstate.getMaterialAmbientG(), gstate.getMaterialAmbientB());
Vec3<int> final_color = mec + mac * Vec3<int>(gstate.getAmbientR(), gstate.getAmbientG(), gstate.getAmbientB()) / 255;
Vec3<int> specular_color(0, 0, 0);
for (unsigned int light = 0; light < 4; ++light) {
// Always calculate texture coords from lighting results if environment mapping is active
// TODO: specular lighting should affect this, too!
// TODO: Not sure if this really should be done even if lighting is disabled altogether
if (gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
Vec3<float> L = Vec3<float>(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF));
float diffuse_factor = Dot(L,vertex.worldnormal) / L.Length() / vertex.worldnormal.Length();
if (gstate.getUVLS0() == light)
vertex.texturecoords.s() = (diffuse_factor + 1.f) / 2.f;
if (gstate.getUVLS1() == light)
vertex.texturecoords.t() = (diffuse_factor + 1.f) / 2.f;
}
}
if (!gstate.isLightingEnabled())
return;
for (unsigned int light = 0; light < 4; ++light) {
if (!gstate.isLightChanEnabled(light))
continue;
// L = vector from vertex to light source
// TODO: Should transfer the light positions to world/view space for these calculations
Vec3<float> L = Vec3<float>(getFloat24(gstate.lpos[3*light]&0xFFFFFF), getFloat24(gstate.lpos[3*light+1]&0xFFFFFF),getFloat24(gstate.lpos[3*light+2]&0xFFFFFF));
L -= vertex.worldpos;
float d = L.Length();
float lka = getFloat24(gstate.latt[3*light]&0xFFFFFF);
float lkb = getFloat24(gstate.latt[3*light+1]&0xFFFFFF);
float lkc = getFloat24(gstate.latt[3*light+2]&0xFFFFFF);
float att = 1.f;
if (!gstate.isDirectionalLight(light)) {
att = 1.f / (lka + lkb * d + lkc * d * d);
if (att > 1.f) att = 1.f;
if (att < 0.f) att = 0.f;
}
float spot = 1.f;
if (gstate.isSpotLight(light)) {
Vec3<float> dir = Vec3<float>(getFloat24(gstate.ldir[3*light]&0xFFFFFF), getFloat24(gstate.ldir[3*light+1]&0xFFFFFF),getFloat24(gstate.ldir[3*light+2]&0xFFFFFF));
float _spot = Dot(-L,dir) / d / dir.Length();
float cutoff = getFloat24(gstate.lcutoff[light]&0xFFFFFF);
if (_spot > cutoff) {
spot = _spot;
float conv = getFloat24(gstate.lconv[light]&0xFFFFFF);
spot = pow(_spot, conv);
} else {
spot = 0.f;
}
}
// ambient lighting
Vec3<int> lac = Vec3<int>(gstate.getLightAmbientColorR(light), gstate.getLightAmbientColorG(light), gstate.getLightAmbientColorB(light));
final_color.r() += att * spot * lac.r() * mac.r() / 255;
final_color.g() += att * spot * lac.g() * mac.g() / 255;
final_color.b() += att * spot * lac.b() * mac.b() / 255;
// diffuse lighting
Vec3<int> ldc = Vec3<int>(gstate.getDiffuseColorR(light), gstate.getDiffuseColorG(light), gstate.getDiffuseColorB(light));
Vec3<int> mdc = (gstate.materialupdate&2)
? vertex.color0.rgb()
: Vec3<int>(gstate.getMaterialDiffuseR(), gstate.getMaterialDiffuseG(), gstate.getMaterialDiffuseB());
float diffuse_factor = Dot(L,vertex.worldnormal) / d / vertex.worldnormal.Length();
if (gstate.isUsingPoweredDiffuseLight(light)) {
float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF);
diffuse_factor = pow(diffuse_factor, k);
}
if (diffuse_factor > 0.f) {
final_color.r() += att * spot * ldc.r() * mdc.r() * diffuse_factor / 255;
final_color.g() += att * spot * ldc.g() * mdc.g() * diffuse_factor / 255;
final_color.b() += att * spot * ldc.b() * mdc.b() * diffuse_factor / 255;
}
if (gstate.isUsingSpecularLight(light)) {
Vec3<float> E(0.f, 0.f, 1.f);
Mat3x3<float> view_matrix(gstate.viewMatrix);
Vec3<float> worldE = view_matrix.Inverse() * (E - Vec3<float>(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]));
Vec3<float> H = worldE / worldE.Length() + L / L.Length();
Vec3<int> lsc = Vec3<int>(gstate.getSpecularColorR(light), gstate.getSpecularColorG(light), gstate.getSpecularColorB(light));
Vec3<int> msc = (gstate.materialupdate&4)
? vertex.color0.rgb()
: Vec3<int>(gstate.getMaterialSpecularR(), gstate.getMaterialSpecularG(), gstate.getMaterialSpecularB());
float specular_factor = Dot(H,vertex.worldnormal) / H.Length() / vertex.worldnormal.Length();
float k = getFloat24(gstate.materialspecularcoef&0xFFFFFF);
specular_factor = pow(specular_factor, k);
if (specular_factor > 0.f) {
specular_color.r() += att * spot * lsc.r() * msc.r() * specular_factor / 255;
specular_color.g() += att * spot * lsc.g() * msc.g() * specular_factor / 255;
specular_color.b() += att * spot * lsc.b() * msc.b() * specular_factor / 255;
}
}
}
vertex.color0.r() = final_color.r();
vertex.color0.g() = final_color.g();
vertex.color0.b() = final_color.b();
if (gstate.isUsingSecondaryColor())
{
vertex.color1 = specular_color;
} else {
vertex.color0.r() += specular_color.r();
vertex.color0.g() += specular_color.g();
vertex.color0.b() += specular_color.b();
vertex.color1 = Vec3<int>(0, 0, 0);
}
int maa = (gstate.materialupdate&1) ? vertex.color0.a() : gstate.getMaterialAmbientA();
vertex.color0.a() = gstate.getAmbientA() * maa / 255;
if (vertex.color0.r() > 255) vertex.color0.r() = 255;
if (vertex.color0.g() > 255) vertex.color0.g() = 255;
if (vertex.color0.b() > 255) vertex.color0.b() = 255;
if (vertex.color0.a() > 255) vertex.color0.a() = 255;
if (vertex.color1.r() > 255) vertex.color1.r() = 255;
if (vertex.color1.g() > 255) vertex.color1.g() = 255;
if (vertex.color1.b() > 255) vertex.color1.b() = 255;
if (vertex.color0.r() < 0) vertex.color0.r() = 0;
if (vertex.color0.g() < 0) vertex.color0.g() = 0;
if (vertex.color0.b() < 0) vertex.color0.b() = 0;
if (vertex.color0.a() < 0) vertex.color0.a() = 0;
if (vertex.color1.r() < 0) vertex.color1.r() = 0;
if (vertex.color1.g() < 0) vertex.color1.g() = 0;
if (vertex.color1.b() < 0) vertex.color1.b() = 0;
}
} // namespace

26
GPU/Software/Lighting.h Normal file

@ -0,0 +1,26 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "TransformUnit.h"
namespace Lighting {
void Process(VertexData& vertex);
}

862
GPU/Software/Rasterizer.cpp Normal file

@ -0,0 +1,862 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "../../Core/MemMap.h"
#include "../GPUState.h"
#include "Rasterizer.h"
#include "Colors.h"
extern u8* fb;
extern u8* depthbuf;
extern u32 clut[4096];
namespace Rasterizer {
//static inline int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2)
static inline int orient2d(const ScreenCoords& v0, const ScreenCoords& v1, const ScreenCoords& v2)
{
return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x);
}
static inline int orient2dIncX(int dY01)
{
return dY01;
}
static inline int orient2dIncY(int dX01)
{
return -dX01;
}
static inline int GetPixelDataOffset(unsigned int texel_size_bits, unsigned int row_pitch_bits, unsigned int u, unsigned int v)
{
if (!(gstate.texmode & 1))
return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8;
int tile_size_bits = 32;
int tiles_in_block_horizontal = 4;
int tiles_in_block_vertical = 8;
int texels_per_tile = tile_size_bits / texel_size_bits;
int tile_u = u / texels_per_tile;
int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) +
// TODO: not sure if the *texel_size_bits/8 factor is correct
(v / tiles_in_block_vertical) * ((row_pitch_bits*texel_size_bits/8/tile_size_bits)*tiles_in_block_vertical) +
(tile_u % tiles_in_block_horizontal) +
(tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical);
// TODO: HACK: for some reason, the second part needs to be diviced by two for CLUT4 textures to work properly.
return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits)))/((texel_size_bits == 4) ? 2 : 1);
}
static inline u32 LookupColor(unsigned int index, unsigned int level)
{
const bool mipmapShareClut = (gstate.texmode & 0x100) == 0;
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
// TODO: No idea if these bswaps are correct
switch (gstate.getClutPaletteFormat()) {
case GE_TFMT_5650:
return DecodeRGB565(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
case GE_TFMT_5551:
return DecodeRGBA5551(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
case GE_TFMT_4444:
return DecodeRGBA4444(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
case GE_TFMT_8888:
return DecodeRGBA8888(clut[index + clutSharingOffset]);
default:
ERROR_LOG(G3D, "Unsupported palette format: %x", gstate.getClutPaletteFormat());
return 0;
}
}
static inline u32 GetClutIndex(u32 index) {
const u32 clutBase = gstate.getClutIndexStartPos();
const u32 clutMask = gstate.getClutIndexMask();
const u8 clutShift = gstate.getClutIndexShift();
return ((index >> clutShift) & clutMask) | clutBase;
}
static inline void GetTexelCoordinates(int level, float s, float t, unsigned int& u, unsigned int& v)
{
s *= getFloat24(gstate.texscaleu);
t *= getFloat24(gstate.texscalev);
s += getFloat24(gstate.texoffsetu);
t += getFloat24(gstate.texoffsetv);
// TODO: Is this really only necessary for UV mapping?
if (gstate.isTexCoordClampedS()) {
if (s > 1.0) s = 1.0;
if (s < 0) s = 0;
} else {
// TODO: Does this work for negative coords?
s = fmod(s, 1.0f);
}
if (gstate.isTexCoordClampedT()) {
if (t > 1.0) t = 1.0;
if (t < 0.0) t = 0.0;
} else {
// TODO: Does this work for negative coords?
t = fmod(t, 1.0f);
}
int width = 1 << (gstate.texsize[level] & 0xf);
int height = 1 << ((gstate.texsize[level]>>8) & 0xf);
u = s * width; // TODO: width-1 instead?
v = t * height; // TODO: width-1 instead?
}
static inline void GetTextureCoordinates(const VertexData& v0, const VertexData& v1, const VertexData& v2, int w0, int w1, int w2, float& s, float& t)
{
if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_ENVIRONMENT_MAP) {
// TODO: What happens if vertex has no texture coordinates?
// Note that for environment mapping, texture coordinates have been calculated during lighting
float q0 = 1.f / v0.clippos.w;
float q1 = 1.f / v1.clippos.w;
float q2 = 1.f / v2.clippos.w;
float q = q0 * w0 + q1 * w1 + q2 * w2;
s = (v0.texturecoords.s() * q0 * w0 + v1.texturecoords.s() * q1 * w1 + v2.texturecoords.s() * q2 * w2) / q;
t = (v0.texturecoords.t() * q0 * w0 + v1.texturecoords.t() * q1 * w1 + v2.texturecoords.t() * q2 * w2) / q;
} else if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX) {
// projection mapping, TODO: Move this code to TransformUnit!
Vec3<float> source;
if (gstate.getUVProjMode() == GE_PROJMAP_POSITION) {
source = ((v0.modelpos * w0 + v1.modelpos * w1 + v2.modelpos * w2) / (w0+w1+w2));
} else {
ERROR_LOG(G3D, "Unsupported UV projection mode %x", gstate.getUVProjMode());
}
Mat3x3<float> tgen(gstate.tgenMatrix);
Vec3<float> stq = tgen * source + Vec3<float>(gstate.tgenMatrix[9], gstate.tgenMatrix[10], gstate.tgenMatrix[11]);
s = stq.x/stq.z;
t = stq.y/stq.z;
} else {
ERROR_LOG(G3D, "Unsupported texture mapping mode %x!", gstate.getUVGenMode());
}
}
static inline u32 SampleNearest(int level, unsigned int u, unsigned int v)
{
GETextureFormat texfmt = gstate.getTextureFormat();
u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...?
// Special rules for kernel textures (PPGe), TODO: Verify!
int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF;
// TODO: Should probably check if textures are aligned properly...
if (texfmt == GE_TFMT_4444) {
srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
return DecodeRGBA4444(*(u16*)srcptr);
} else if (texfmt == GE_TFMT_5551) {
srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
return DecodeRGBA5551(*(u16*)srcptr);
} else if (texfmt == GE_TFMT_5650) {
srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
return DecodeRGB565(*(u16*)srcptr);
} else if (texfmt == GE_TFMT_8888) {
srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v);
return DecodeRGBA8888(*(u32*)srcptr);
} else if (texfmt == GE_TFMT_CLUT32) {
srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v);
u32 val = srcptr[0] + (srcptr[1] << 8) + (srcptr[2] << 16) + (srcptr[3] << 24);
return LookupColor(GetClutIndex(val), level);
} else if (texfmt == GE_TFMT_CLUT16) {
srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
u16 val = srcptr[0] + (srcptr[1] << 8);
return LookupColor(GetClutIndex(val), level);
} else if (texfmt == GE_TFMT_CLUT8) {
srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v);
u8 val = *srcptr;
return LookupColor(GetClutIndex(val), level);
} else if (texfmt == GE_TFMT_CLUT4) {
srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v);
u8 val = (u & 1) ? (srcptr[0] >> 4) : (srcptr[0] & 0xF);
return LookupColor(GetClutIndex(val), level);
} else {
ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt);
return 0;
}
}
// NOTE: These likely aren't endian safe
static inline u32 GetPixelColor(int x, int y)
{
switch (gstate.FrameBufFormat()) {
case GE_FORMAT_565:
return DecodeRGB565(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]);
case GE_FORMAT_5551:
return DecodeRGBA5551(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]);
case GE_FORMAT_4444:
return DecodeRGBA4444(*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]);
case GE_FORMAT_8888:
return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()];
}
return 0;
}
static inline void SetPixelColor(int x, int y, u32 value)
{
switch (gstate.FrameBufFormat()) {
case GE_FORMAT_565:
*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To565(value);
break;
case GE_FORMAT_5551:
*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To5551(value);
break;
case GE_FORMAT_4444:
*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = RGBA8888To4444(value);
break;
case GE_FORMAT_8888:
*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value;
break;
}
}
static inline u16 GetPixelDepth(int x, int y)
{
return *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()];
}
static inline void SetPixelDepth(int x, int y, u16 value)
{
*(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value;
}
static inline u8 GetPixelStencil(int x, int y)
{
if (gstate.FrameBufFormat() == GE_FORMAT_565) {
// TODO: Should we return 0xFF instead here?
return 0;
} else if (gstate.FrameBufFormat() != GE_FORMAT_8888) {
return (((*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()]) & 0x8000) != 0) ? 0xFF : 0;
} else {
return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0;
}
}
static inline void SetPixelStencil(int x, int y, u8 value)
{
if (gstate.FrameBufFormat() == GE_FORMAT_565) {
// Do nothing
} else if (gstate.FrameBufFormat() != GE_FORMAT_8888) {
*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] = (*(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()] & ~0x8000) | ((value&0x80)<<8);
} else {
*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24);
}
}
static inline bool DepthTestPassed(int x, int y, u16 z)
{
u16 reference_z = GetPixelDepth(x, y);
if (gstate.isModeClear())
return true;
switch (gstate.getDepthTestFunc()) {
case GE_COMP_NEVER:
return false;
case GE_COMP_ALWAYS:
return true;
case GE_COMP_EQUAL:
return (z == reference_z);
case GE_COMP_NOTEQUAL:
return (z != reference_z);
case GE_COMP_LESS:
return (z < reference_z);
case GE_COMP_LEQUAL:
return (z <= reference_z);
case GE_COMP_GREATER:
return (z > reference_z);
case GE_COMP_GEQUAL:
return (z >= reference_z);
default:
return 0;
}
}
static inline bool IsRightSideOrFlatBottomLine(const Vec2<u10>& vertex, const Vec2<u10>& line1, const Vec2<u10>& line2)
{
if (line1.y == line2.y) {
// just check if vertex is above us => bottom line parallel to x-axis
return vertex.y < line1.y;
} else {
// check if vertex is on our left => right side
return vertex.x < line1.x + ((int)line2.x - (int)line1.x) * ((int)vertex.y - (int)line1.y) / ((int)line2.y - (int)line1.y);
}
}
static inline bool StencilTestPassed(u8 stencil)
{
// TODO: Does the masking logic make any sense?
stencil &= gstate.getStencilTestMask();
u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask();
switch (gstate.getStencilTestFunction()) {
case GE_COMP_NEVER:
return false;
case GE_COMP_ALWAYS:
return true;
case GE_COMP_EQUAL:
return (stencil == ref);
case GE_COMP_NOTEQUAL:
return (stencil != ref);
case GE_COMP_LESS:
return (stencil < ref);
case GE_COMP_LEQUAL:
return (stencil <= ref);
case GE_COMP_GREATER:
return (stencil > ref);
case GE_COMP_GEQUAL:
return (stencil >= ref);
}
}
static inline void ApplyStencilOp(int op, int x, int y)
{
u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask?
u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask?
switch (op) {
case GE_STENCILOP_KEEP:
return;
case GE_STENCILOP_ZERO:
SetPixelStencil(x, y, 0);
return;
case GE_STENCILOP_REPLACE:
SetPixelStencil(x, y, reference_stencil);
break;
case GE_STENCILOP_INVERT:
SetPixelStencil(x, y, ~old_stencil);
break;
case GE_STENCILOP_INCR:
// TODO: Does this overflow?
if (old_stencil != 0xFF)
SetPixelStencil(x, y, old_stencil+1);
break;
case GE_STENCILOP_DECR:
// TODO: Does this underflow?
if (old_stencil != 0)
SetPixelStencil(x, y, old_stencil-1);
break;
}
}
static inline Vec4<int> GetTextureFunctionOutput(const Vec3<int>& prim_color_rgb, int prim_color_a, const Vec4<int>& texcolor)
{
Vec3<int> out_rgb;
int out_a;
bool rgba = (gstate.texfunc & 0x100) != 0;
switch (gstate.getTextureFunction()) {
case GE_TEXFUNC_MODULATE:
out_rgb = prim_color_rgb * texcolor.rgb() / 255;
out_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a;
break;
case GE_TEXFUNC_DECAL:
{
int t = (rgba) ? texcolor.a() : 255;
int invt = (rgba) ? 255 - t : 0;
out_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255;
out_a = prim_color_a;
break;
}
case GE_TEXFUNC_BLEND:
{
const Vec3<int> const255(255, 255, 255);
const Vec3<int> texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB());
out_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255;
out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255;
break;
}
case GE_TEXFUNC_REPLACE:
out_rgb = texcolor.rgb();
out_a = (rgba) ? texcolor.a() : prim_color_a;
break;
case GE_TEXFUNC_ADD:
out_rgb = prim_color_rgb + texcolor.rgb();
if (out_rgb.r() > 255) out_rgb.r() = 255;
if (out_rgb.g() > 255) out_rgb.g() = 255;
if (out_rgb.b() > 255) out_rgb.b() = 255;
out_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255;
break;
default:
ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction());
}
return Vec4<int>(out_rgb.r(), out_rgb.g(), out_rgb.b(), out_a);
}
static inline bool ColorTestPassed(Vec3<int> color)
{
u32 mask = gstate.colormask&0xFFFFFF;
color = Vec3<int>::FromRGB(color.ToRGB() & mask);
Vec3<int> ref = Vec3<int>::FromRGB(gstate.colorref & mask);
switch (gstate.colortest & 0x3) {
case GE_COMP_NEVER:
return false;
case GE_COMP_ALWAYS:
return true;
case GE_COMP_EQUAL:
return (color.r() == ref.r() && color.g() == ref.g() && color.b() == ref.b());
case GE_COMP_NOTEQUAL:
return (color.r() != ref.r() || color.g() != ref.g() || color.b() != ref.b());
}
}
static inline bool AlphaTestPassed(int alpha)
{
u8 mask = (gstate.alphatest >> 16) & 0xFF;
u8 ref = (gstate.alphatest >> 8) & mask;
alpha &= mask;
switch (gstate.alphatest & 0x7) {
case GE_COMP_NEVER:
return false;
case GE_COMP_ALWAYS:
return true;
case GE_COMP_EQUAL:
return (alpha == ref);
case GE_COMP_NOTEQUAL:
return (alpha != ref);
case GE_COMP_LESS:
return (alpha < ref);
case GE_COMP_LEQUAL:
return (alpha <= ref);
case GE_COMP_GREATER:
return (alpha > ref);
case GE_COMP_GEQUAL:
return (alpha >= ref);
}
}
static inline Vec3<int> GetSourceFactor(int source_a, const Vec4<int>& dst)
{
switch (gstate.getBlendFuncA()) {
case GE_SRCBLEND_DSTCOLOR:
return dst.rgb();
case GE_SRCBLEND_INVDSTCOLOR:
return Vec3<int>::AssignToAll(255) - dst.rgb();
case GE_SRCBLEND_SRCALPHA:
return Vec3<int>::AssignToAll(source_a);
case GE_SRCBLEND_INVSRCALPHA:
return Vec3<int>::AssignToAll(255 - source_a);
case GE_SRCBLEND_DSTALPHA:
return Vec3<int>::AssignToAll(dst.a());
case GE_SRCBLEND_INVDSTALPHA:
return Vec3<int>::AssignToAll(255 - dst.a());
case GE_SRCBLEND_DOUBLESRCALPHA:
return Vec3<int>::AssignToAll(2 * source_a);
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
return Vec3<int>::AssignToAll(255 - 2 * source_a);
case GE_SRCBLEND_DOUBLEDSTALPHA:
return Vec3<int>::AssignToAll(2 * dst.a());
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
// TODO: Clamping?
return Vec3<int>::AssignToAll(255 - 2 * dst.a());
case GE_SRCBLEND_FIXA:
return Vec4<int>::FromRGBA(gstate.getFixA()).rgb();
default:
ERROR_LOG(G3D, "Unknown source factor %x", gstate.getBlendFuncA());
return Vec3<int>();
}
}
static inline Vec3<int> GetDestFactor(const Vec3<int>& source_rgb, int source_a, const Vec4<int>& dst)
{
switch (gstate.getBlendFuncB()) {
case GE_DSTBLEND_SRCCOLOR:
return source_rgb;
case GE_DSTBLEND_INVSRCCOLOR:
return Vec3<int>::AssignToAll(255) - source_rgb;
case GE_DSTBLEND_SRCALPHA:
return Vec3<int>::AssignToAll(source_a);
case GE_DSTBLEND_INVSRCALPHA:
return Vec3<int>::AssignToAll(255 - source_a);
case GE_DSTBLEND_DSTALPHA:
return Vec3<int>::AssignToAll(dst.a());
case GE_DSTBLEND_INVDSTALPHA:
return Vec3<int>::AssignToAll(255 - dst.a());
case GE_DSTBLEND_DOUBLESRCALPHA:
return Vec3<int>::AssignToAll(2 * source_a);
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
return Vec3<int>::AssignToAll(255 - 2 * source_a);
case GE_DSTBLEND_DOUBLEDSTALPHA:
return Vec3<int>::AssignToAll(2 * dst.a());
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
return Vec3<int>::AssignToAll(255 - 2 * dst.a());
case GE_DSTBLEND_FIXB:
return Vec4<int>::FromRGBA(gstate.getFixB()).rgb();
default:
ERROR_LOG(G3D, "Unknown dest factor %x", gstate.getBlendFuncB());
return Vec3<int>();
}
}
static inline Vec3<int> AlphaBlendingResult(const Vec3<int>& source_rgb, int source_a, const Vec4<int> dst)
{
Vec3<int> srcfactor = GetSourceFactor(source_a, dst);
Vec3<int> dstfactor = GetDestFactor(source_rgb, source_a, dst);
switch (gstate.getBlendEq()) {
case GE_BLENDMODE_MUL_AND_ADD:
return (source_rgb * srcfactor + dst.rgb() * dstfactor) / 255;
case GE_BLENDMODE_MUL_AND_SUBTRACT:
return (source_rgb * srcfactor - dst.rgb() * dstfactor) / 255;
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
return (dst.rgb() * dstfactor - source_rgb * srcfactor) / 255;
case GE_BLENDMODE_MIN:
return Vec3<int>(std::min(source_rgb.r(), dst.r()),
std::min(source_rgb.g(), dst.g()),
std::min(source_rgb.b(), dst.b()));
case GE_BLENDMODE_MAX:
return Vec3<int>(std::max(source_rgb.r(), dst.r()),
std::max(source_rgb.g(), dst.g()),
std::max(source_rgb.b(), dst.b()));
case GE_BLENDMODE_ABSDIFF:
return Vec3<int>(::abs(source_rgb.r() - dst.r()),
::abs(source_rgb.g() - dst.g()),
::abs(source_rgb.b() - dst.b()));
default:
ERROR_LOG(G3D, "Unknown blend function %x", gstate.getBlendEq());
return Vec3<int>();
}
}
// Draws triangle, vertices specified in counter-clockwise direction
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2)
{
Vec2<int> d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y);
Vec2<int> d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y);
Vec2<int> d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y);
// Drop primitives which are not in CCW order by checking the cross product
if (d01.x * d02.y - d01.y * d02.x < 0)
return;
int minX = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16;
int minY = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16;
int maxX = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) / 16 * 16;
int maxY = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) / 16 * 16;
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1(), 0);
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2(), 0);
minX = std::max(minX, (int)TransformUnit::DrawingToScreen(scissorTL).x);
maxX = std::min(maxX, (int)TransformUnit::DrawingToScreen(scissorBR).x);
minY = std::max(minY, (int)TransformUnit::DrawingToScreen(scissorTL).y);
maxY = std::min(maxY, (int)TransformUnit::DrawingToScreen(scissorBR).y);
int bias0 = IsRightSideOrFlatBottomLine(v0.screenpos.xy(), v1.screenpos.xy(), v2.screenpos.xy()) ? -1 : 0;
int bias1 = IsRightSideOrFlatBottomLine(v1.screenpos.xy(), v2.screenpos.xy(), v0.screenpos.xy()) ? -1 : 0;
int bias2 = IsRightSideOrFlatBottomLine(v2.screenpos.xy(), v0.screenpos.xy(), v1.screenpos.xy()) ? -1 : 0;
ScreenCoords pprime(minX, minY, 0);
int w0_base = orient2d(v1.screenpos, v2.screenpos, pprime);
int w1_base = orient2d(v2.screenpos, v0.screenpos, pprime);
int w2_base = orient2d(v0.screenpos, v1.screenpos, pprime);
for (pprime.y = minY; pprime.y <= maxY; pprime.y +=16,
w0_base += orient2dIncY(d12.x)*16,
w1_base += orient2dIncY(-d02.x)*16,
w2_base += orient2dIncY(d01.x)*16) {
int w0 = w0_base;
int w1 = w1_base;
int w2 = w2_base;
for (pprime.x = minX; pprime.x <= maxX; pprime.x +=16,
w0 += orient2dIncX(d12.y)*16,
w1 += orient2dIncX(-d02.y)*16,
w2 += orient2dIncX(d01.y)*16) {
DrawingCoords p = TransformUnit::ScreenToDrawing(pprime);
// If p is on or inside all edges, render pixel
// TODO: Should we render if the pixel is both on the left and the right side? (i.e. degenerated triangle)
if (w0 + bias0 >=0 && w1 + bias1 >= 0 && w2 + bias2 >= 0) {
// TODO: Check if this check is still necessary
if (w0 == w1 && w1 == w2 && w2 == 0)
continue;
Vec3<int> prim_color_rgb(0, 0, 0);
int prim_color_a = 0;
Vec3<int> sec_color(0, 0, 0);
if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) {
// NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues.
// Not sure if that should be regarded as a bug or if casting to float is a valid fix.
// TODO: Is that the correct way to interpolate?
prim_color_rgb = ((v0.color0.rgb().Cast<float>() * w0 +
v1.color0.rgb().Cast<float>() * w1 +
v2.color0.rgb().Cast<float>() * w2) / (w0+w1+w2)).Cast<int>();
prim_color_a = (int)(((float)v0.color0.a() * w0 + (float)v1.color0.a() * w1 + (float)v2.color0.a() * w2) / (w0+w1+w2));
sec_color = ((v0.color1.Cast<float>() * w0 +
v1.color1.Cast<float>() * w1 +
v2.color1.Cast<float>() * w2) / (w0+w1+w2)).Cast<int>();
} else {
prim_color_rgb = v2.color0.rgb();
prim_color_a = v2.color0.a();
sec_color = v2.color1;
}
if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) {
unsigned int u = 0, v = 0;
if (gstate.isModeThrough()) {
// TODO: Is it really this simple?
u = (v0.texturecoords.s() * w0 + v1.texturecoords.s() * w1 + v2.texturecoords.s() * w2) / (w0+w1+w2);
v = (v0.texturecoords.t() * w0 + v1.texturecoords.t() * w1 + v2.texturecoords.t() * w2) / (w0+w1+w2);
} else {
float s = 0, t = 0;
GetTextureCoordinates(v0, v1, v2, w0, w1, w2, s, t);
GetTexelCoordinates(0, s, t, u, v);
}
Vec4<int> texcolor = Vec4<int>::FromRGBA(SampleNearest(0, u, v));
Vec4<int> out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor);
prim_color_rgb = out.rgb();
prim_color_a = out.a();
}
if (gstate.isColorDoublingEnabled()) {
// TODO: Do we need to clamp here?
prim_color_rgb *= 2;
sec_color *= 2;
}
prim_color_rgb += sec_color;
// TODO: Fogging
// TODO: Is that the correct way to interpolate?
u16 z = (u16)(((float)v0.screenpos.z * w0 + (float)v1.screenpos.z * w1 + (float)v2.screenpos.z * w2) / (w0+w1+w2));
// Depth range test
if (!gstate.isModeThrough())
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
continue;
if (gstate.isColorTestEnabled() && !gstate.isModeClear())
if (!ColorTestPassed(prim_color_rgb))
continue;
if (gstate.isAlphaTestEnabled() && !gstate.isModeClear())
if (!AlphaTestPassed(prim_color_a))
continue;
if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) {
u8 stencil = GetPixelStencil(p.x, p.y);
if (!StencilTestPassed(stencil)) {
ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y);
continue;
}
}
// TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled?
if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) {
// TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled
if (!DepthTestPassed(p.x, p.y, z)) {
ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y);
continue;
} else {
ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y);
}
if (gstate.isModeClear() && gstate.isClearModeDepthWriteEnabled())
SetPixelDepth(p.x, p.y, z);
else if (!gstate.isModeClear() && gstate.isDepthWriteEnabled())
SetPixelDepth(p.x, p.y, z);
}
if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) {
Vec4<int> dst = Vec4<int>::FromRGBA(GetPixelColor(p.x, p.y));
prim_color_rgb = AlphaBlendingResult(prim_color_rgb, prim_color_a, dst);
}
if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255;
if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255;
if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255;
if (prim_color_a > 255) prim_color_a = 255;
if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0;
if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0;
if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0;
if (prim_color_a < 0) prim_color_a = 0;
u32 new_color = Vec4<int>(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA();
u32 old_color = GetPixelColor(p.x, p.y);
// TODO: Is alpha blending still performed if logic ops are enabled?
if (gstate.isLogicOpEnabled() && !gstate.isModeClear()) {
switch (gstate.getLogicOp()) {
case GE_LOGIC_CLEAR:
new_color = 0;
break;
case GE_LOGIC_AND:
new_color = new_color & old_color;
break;
case GE_LOGIC_AND_REVERSE:
new_color = new_color & ~old_color;
break;
case GE_LOGIC_COPY:
//new_color = new_color;
break;
case GE_LOGIC_AND_INVERTED:
new_color = ~new_color & old_color;
break;
case GE_LOGIC_NOOP:
new_color = old_color;
break;
case GE_LOGIC_XOR:
new_color = new_color ^ old_color;
break;
case GE_LOGIC_OR:
new_color = new_color | old_color;
break;
case GE_LOGIC_NOR:
new_color = ~(new_color | old_color);
break;
case GE_LOGIC_EQUIV:
new_color = ~(new_color ^ old_color);
break;
case GE_LOGIC_INVERTED:
new_color = ~old_color;
break;
case GE_LOGIC_OR_REVERSE:
new_color = new_color | ~old_color;
break;
case GE_LOGIC_COPY_INVERTED:
new_color = ~new_color;
break;
case GE_LOGIC_OR_INVERTED:
new_color = ~new_color | old_color;
break;
case GE_LOGIC_NAND:
new_color = ~(new_color & old_color);
break;
case GE_LOGIC_SET:
new_color = 0xFFFFFFFF;
break;
}
}
if (gstate.isModeClear()) {
new_color = (new_color & gstate.getClearModeColorMask()) | (old_color & ~gstate.getClearModeColorMask());
} else {
new_color = (new_color & ~gstate.getColorMask()) | (old_color & gstate.getColorMask());
}
SetPixelColor(p.x, p.y, new_color);
}
}
}
}
} // namespace

27
GPU/Software/Rasterizer.h Normal file

@ -0,0 +1,27 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "TransformUnit.h" // for DrawingCoords
namespace Rasterizer {
// Draws a triangle if its vertices are specified in counter-clockwise order
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2);
}

915
GPU/Software/SoftGpu.cpp Normal file

@ -0,0 +1,915 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "../GPUState.h"
#include "../ge_constants.h"
#include "../../Core/MemMap.h"
#include "../../Core/HLE/sceKernelInterrupt.h"
#include "../../Core/HLE/sceGe.h"
#include "gfx/gl_common.h"
#include "SoftGpu.h"
#include "TransformUnit.h"
#include "Colors.h"
static GLuint temp_texture = 0;
static GLint attr_pos = -1, attr_tex = -1;
static GLint uni_tex = -1;
static GLuint program;
const int FB_HEIGHT = 272;
u8* fb = NULL;
u8* depthbuf = NULL;
u32 clut[4096];
GLuint OpenGL_CompileProgram(const char* vertexShader, const char* fragmentShader)
{
// generate objects
GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
GLuint programID = glCreateProgram();
// compile vertex shader
glShaderSource(vertexShaderID, 1, &vertexShader, NULL);
glCompileShader(vertexShaderID);
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
GLint Result = GL_FALSE;
char stringBuffer[1024];
GLsizei stringBufferUsage = 0;
glGetShaderiv(vertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderInfoLog(vertexShaderID, 1024, &stringBufferUsage, stringBuffer);
if(Result && stringBufferUsage) {
// not nice
} else if(!Result) {
// not nice
} else {
// not nice
}
bool shader_errors = !Result;
#endif
// compile fragment shader
glShaderSource(fragmentShaderID, 1, &fragmentShader, NULL);
glCompileShader(fragmentShaderID);
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
glGetShaderiv(fragmentShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderInfoLog(fragmentShaderID, 1024, &stringBufferUsage, stringBuffer);
if(Result && stringBufferUsage) {
// not nice
} else if(!Result) {
// not nice
} else {
// not nice
}
shader_errors |= !Result;
#endif
// link them
glAttachShader(programID, vertexShaderID);
glAttachShader(programID, fragmentShaderID);
glLinkProgram(programID);
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(DEBUG_GLSL)
glGetProgramiv(programID, GL_LINK_STATUS, &Result);
glGetProgramInfoLog(programID, 1024, &stringBufferUsage, stringBuffer);
if(Result && stringBufferUsage) {
// not nice
} else if(!Result && !shader_errors) {
// not nice
}
#endif
// cleanup
glDeleteShader(vertexShaderID);
glDeleteShader(fragmentShaderID);
return programID;
}
SoftGPU::SoftGPU()
{
glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
glPixelStorei(GL_UNPACK_ALIGNMENT, 4); // 4-byte pixel alignment
glGenTextures(1, &temp_texture);
// TODO: Use highp for GLES
static const char *fragShaderText =
"varying vec2 TexCoordOut;\n"
"uniform sampler2D Texture;\n"
"void main() {\n"
" vec4 tmpcolor;\n"
" tmpcolor = texture2D(Texture, TexCoordOut);\n"
" gl_FragColor = tmpcolor;\n"
"}\n";
static const char *vertShaderText =
"attribute vec4 pos;\n"
"attribute vec2 TexCoordIn;\n "
"varying vec2 TexCoordOut;\n "
"void main() {\n"
" gl_Position = pos;\n"
" TexCoordOut = TexCoordIn;\n"
"}\n";
program = OpenGL_CompileProgram(vertShaderText, fragShaderText);
glUseProgram(program);
uni_tex = glGetUniformLocation(program, "Texture");
attr_pos = glGetAttribLocation(program, "pos");
attr_tex = glGetAttribLocation(program, "TexCoordIn");
fb = Memory::GetPointer(0x44000000); // TODO: correct default address?
depthbuf = Memory::GetPointer(0x44000000); // TODO: correct default address?
}
SoftGPU::~SoftGPU()
{
glDeleteProgram(program);
glDeleteTextures(1, &temp_texture);
}
// Copies RGBA8 data from RAM to the currently bound render target.
void CopyToCurrentFboFromRam(u8* data, int srcwidth, int srcheight, int dstwidth, int dstheight)
{
glDisable(GL_BLEND);
glViewport(0, 0, dstwidth, dstheight);
glScissor(0, 0, dstwidth, dstheight);
glBindTexture(GL_TEXTURE_2D, temp_texture);
if (gstate.FrameBufFormat() == GE_FORMAT_8888) {
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
} else {
// TODO: This should probably be converted in a shader instead..
// TODO: Do something less brain damaged to manage this buffer...
u32* buf = new u32[srcwidth*srcheight];
for (int y = 0; y < srcheight; ++y) {
for (int x = 0; x < srcwidth; ++x) {
u16 src = *(u16*)&fb[2*x + 2*y*gstate.FrameBufStride()];
if (gstate.FrameBufFormat() == GE_FORMAT_565)
buf[x+y*srcwidth] = DecodeRGB565(src);
else if (gstate.FrameBufFormat() == GE_FORMAT_5551)
buf[x+y*srcwidth] = DecodeRGBA5551(src);
else if (gstate.FrameBufFormat() == GE_FORMAT_4444)
buf[x+y*srcwidth] = DecodeRGBA4444(src);
}
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)srcwidth, (GLsizei)srcheight, 0, GL_RGBA, GL_UNSIGNED_BYTE, buf);
delete[] buf;
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glUseProgram(program);
static const GLfloat verts[4][2] = {
{ -1, -1}, // Left top
{ -1, 1}, // left bottom
{ 1, 1}, // right bottom
{ 1, -1} // right top
};
static const GLfloat texverts[4][2] = {
{0, 1},
{0, 0},
{1, 0},
{1, 1}
};
glVertexAttribPointer(attr_pos, 2, GL_FLOAT, GL_FALSE, 0, verts);
glVertexAttribPointer(attr_tex, 2, GL_FLOAT, GL_FALSE, 0, texverts);
glEnableVertexAttribArray(attr_pos);
glEnableVertexAttribArray(attr_tex);
glUniform1i(uni_tex, 0);
glActiveTexture(GL_TEXTURE0);
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
glDisableVertexAttribArray(attr_pos);
glDisableVertexAttribArray(attr_tex);
glBindTexture(GL_TEXTURE_2D, 0);
}
void SoftGPU::CopyDisplayToOutput()
{
// TODO: How to get the correct dimensions?
CopyToCurrentFboFromRam(fb, gstate.fbwidth & 0x3C0, FB_HEIGHT, PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight);
}
u32 SoftGPU::DrawSync(int mode)
{
if (mode == 0) // Wait for completion
{
__RunOnePendingInterrupt();
}
return GPUCommon::DrawSync(mode);
}
void SoftGPU::FastRunLoop(DisplayList &list) {
for (; downcount > 0; --downcount) {
u32 op = Memory::ReadUnchecked_U32(list.pc);
u32 cmd = op >> 24;
u32 diff = op ^ gstate.cmdmem[cmd];
gstate.cmdmem[cmd] = op;
ExecuteOp(op, diff);
list.pc += 4;
}
}
void SoftGPU::ExecuteOp(u32 op, u32 diff)
{
u32 cmd = op >> 24;
u32 data = op & 0xFFFFFF;
// Handle control and drawing commands here directly. The others we delegate.
switch (cmd)
{
case GE_CMD_BASE:
DEBUG_LOG(G3D,"DL BASE: %06x", data);
break;
case GE_CMD_VADDR: /// <<8????
gstate_c.vertexAddr = ((gstate.base & 0x00FF0000) << 8)|data;
DEBUG_LOG(G3D,"DL VADDR: %06x", gstate_c.vertexAddr);
break;
case GE_CMD_IADDR:
gstate_c.indexAddr = ((gstate.base & 0x00FF0000) << 8)|data;
DEBUG_LOG(G3D,"DL IADDR: %06x", gstate_c.indexAddr);
break;
case GE_CMD_PRIM:
{
u32 count = data & 0xFFFF;
u32 type = data >> 16;
static const char* types[7] = {
"POINTS=0,",
"LINES=1,",
"LINE_STRIP=2,",
"TRIANGLES=3,",
"TRIANGLE_STRIP=4,",
"TRIANGLE_FAN=5,",
"RECTANGLES=6,",
};
if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_TRIANGLE_FAN && type != GE_PRIM_RECTANGLES) {
ERROR_LOG(G3D, "DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
break;
}
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
break;
}
void *verts = Memory::GetPointer(gstate_c.vertexAddr);
void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr);
break;
}
indices = Memory::GetPointer(gstate_c.indexAddr);
}
TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType);
}
break;
// The arrow and other rotary items in Puzbob are bezier patches, strangely enough.
case GE_CMD_BEZIER:
{
int bz_ucount = data & 0xFF;
int bz_vcount = (data >> 8) & 0xFF;
DEBUG_LOG(G3D,"DL DRAW BEZIER: %i x %i", bz_ucount, bz_vcount);
}
break;
case GE_CMD_SPLINE:
{
int sp_ucount = data & 0xFF;
int sp_vcount = (data >> 8) & 0xFF;
int sp_utype = (data >> 16) & 0x3;
int sp_vtype = (data >> 18) & 0x3;
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
ERROR_LOG(G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
break;
}
void *control_points = Memory::GetPointer(gstate_c.vertexAddr);
void *indices = NULL;
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
ERROR_LOG(G3D, "Bad index address %08x!", gstate_c.indexAddr);
break;
}
indices = Memory::GetPointer(gstate_c.indexAddr);
}
if (gstate.getPatchPrimitiveType() != GE_PATCHPRIM_TRIANGLES) {
ERROR_LOG(G3D, "Unsupported patch primitive %x", gstate.patchprimitive&3);
break;
}
TransformUnit::SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, gstate.patchprimitive&3, gstate.vertType);
DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype);
}
break;
case GE_CMD_BJUMP:
// bounding box jump. Let's just not jump, for now.
DEBUG_LOG(G3D,"DL BBOX JUMP - unimplemented");
break;
case GE_CMD_BOUNDINGBOX:
// bounding box test. Let's do nothing.
DEBUG_LOG(G3D,"DL BBOX TEST - unimplemented");
break;
case GE_CMD_VERTEXTYPE:
DEBUG_LOG(G3D,"DL SetVertexType: %06x", data);
// This sets through-mode or not, as well.
break;
case GE_CMD_REGION1:
{
int x1 = data & 0x3ff;
int y1 = data >> 10;
//topleft
DEBUG_LOG(G3D,"DL Region TL: %d %d", x1, y1);
}
break;
case GE_CMD_REGION2:
{
int x2 = data & 0x3ff;
int y2 = data >> 10;
DEBUG_LOG(G3D,"DL Region BR: %d %d", x2, y2);
}
break;
case GE_CMD_CLIPENABLE:
DEBUG_LOG(G3D, "DL Clip Enable: %i (ignoring)", data);
break;
case GE_CMD_CULLFACEENABLE:
DEBUG_LOG(G3D, "DL CullFace Enable: %i (ignoring)", data);
break;
case GE_CMD_TEXTUREMAPENABLE:
DEBUG_LOG(G3D, "DL Texture map enable: %i", data);
break;
case GE_CMD_LIGHTINGENABLE:
DEBUG_LOG(G3D, "DL Lighting enable: %i", data);
break;
case GE_CMD_FOGENABLE:
DEBUG_LOG(G3D, "DL Fog Enable: %i", gstate.fogEnable);
break;
case GE_CMD_DITHERENABLE:
DEBUG_LOG(G3D, "DL Dither Enable: %i", gstate.ditherEnable);
break;
case GE_CMD_OFFSETX:
DEBUG_LOG(G3D, "DL Offset X: %i", gstate.offsetx);
break;
case GE_CMD_OFFSETY:
DEBUG_LOG(G3D, "DL Offset Y: %i", gstate.offsety);
break;
case GE_CMD_TEXSCALEU:
gstate_c.uv.uScale = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uv.uScale);
break;
case GE_CMD_TEXSCALEV:
gstate_c.uv.vScale = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.uv.vScale);
break;
case GE_CMD_TEXOFFSETU:
gstate_c.uv.uOff = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uv.uOff);
break;
case GE_CMD_TEXOFFSETV:
gstate_c.uv.vOff = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.uv.vOff);
break;
case GE_CMD_SCISSOR1:
{
int x1 = data & 0x3ff;
int y1 = data >> 10;
DEBUG_LOG(G3D, "DL Scissor TL: %i, %i", x1,y1);
}
break;
case GE_CMD_SCISSOR2:
{
int x2 = data & 0x3ff;
int y2 = data >> 10;
DEBUG_LOG(G3D, "DL Scissor BR: %i, %i", x2, y2);
}
break;
case GE_CMD_MINZ:
DEBUG_LOG(G3D, "DL MinZ: %i", data);
break;
case GE_CMD_MAXZ:
DEBUG_LOG(G3D, "DL MaxZ: %i", data);
break;
case GE_CMD_FRAMEBUFPTR:
{
u32 ptr = op & 0xFFE000;
fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8));
DEBUG_LOG(G3D, "DL FramebufPtr: %08x", ptr);
}
break;
case GE_CMD_FRAMEBUFWIDTH:
{
u32 w = data & 0xFFFFFF;
fb = Memory::GetPointer(0x44000000 | (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8));
DEBUG_LOG(G3D, "DL FramebufWidth: %i", w);
}
break;
case GE_CMD_FRAMEBUFPIXFORMAT:
break;
case GE_CMD_TEXADDR0:
gstate_c.textureChanged=true;
case GE_CMD_TEXADDR1:
case GE_CMD_TEXADDR2:
case GE_CMD_TEXADDR3:
case GE_CMD_TEXADDR4:
case GE_CMD_TEXADDR5:
case GE_CMD_TEXADDR6:
case GE_CMD_TEXADDR7:
DEBUG_LOG(G3D,"DL Texture address %i: %06x", cmd-GE_CMD_TEXADDR0, data);
break;
case GE_CMD_TEXBUFWIDTH0:
gstate_c.textureChanged=true;
case GE_CMD_TEXBUFWIDTH1:
case GE_CMD_TEXBUFWIDTH2:
case GE_CMD_TEXBUFWIDTH3:
case GE_CMD_TEXBUFWIDTH4:
case GE_CMD_TEXBUFWIDTH5:
case GE_CMD_TEXBUFWIDTH6:
case GE_CMD_TEXBUFWIDTH7:
DEBUG_LOG(G3D,"DL Texture BUFWIDTHess %i: %06x", cmd-GE_CMD_TEXBUFWIDTH0, data);
break;
case GE_CMD_CLUTADDR:
//DEBUG_LOG(G3D,"CLUT base addr: %06x", data);
break;
case GE_CMD_CLUTADDRUPPER:
DEBUG_LOG(G3D,"DL CLUT addr: %08x", ((gstate.clutaddrupper & 0xFF0000)<<8) | (gstate.clutaddr & 0xFFFFFF));
break;
case GE_CMD_LOADCLUT:
{
u32 clutAddr = ((gstate.clutaddr & 0xFFFFF0) | ((gstate.clutaddrupper << 8) & 0xFF000000));
u32 clutTotalBytes_ = (gstate.loadclut & 0x3f) * 32;
if (Memory::IsValidAddress(clutAddr)) {
Memory::Memcpy(clut, clutAddr, clutTotalBytes_);
} else {
// TODO: Does this make any sense?
memset(clut, 0xFF, clutTotalBytes_);
}
if (clutAddr)
{
DEBUG_LOG(G3D,"DL Clut load: %08x", clutAddr);
}
else
{
DEBUG_LOG(G3D,"DL Empty Clut load");
}
}
break;
//case GE_CMD_TRANSFERSRC:
case GE_CMD_TRANSFERSRCW:
{
u32 xferSrc = gstate.transfersrc | ((data&0xFF0000)<<8);
u32 xferSrcW = gstate.transfersrcw & 1023;
DEBUG_LOG(G3D,"Block Transfer Src: %08x W: %i", xferSrc, xferSrcW);
break;
}
// case GE_CMD_TRANSFERDST:
case GE_CMD_TRANSFERDSTW:
{
u32 xferDst= gstate.transferdst | ((data&0xFF0000)<<8);
u32 xferDstW = gstate.transferdstw & 1023;
DEBUG_LOG(G3D,"Block Transfer Dest: %08x W: %i", xferDst, xferDstW);
break;
}
case GE_CMD_TRANSFERSRCPOS:
{
u32 x = (data & 1023)+1;
u32 y = ((data>>10) & 1023)+1;
DEBUG_LOG(G3D, "DL Block Transfer Src Rect TL: %i, %i", x, y);
break;
}
case GE_CMD_TRANSFERDSTPOS:
{
u32 x = (data & 1023)+1;
u32 y = ((data>>10) & 1023)+1;
DEBUG_LOG(G3D, "DL Block Transfer Dest Rect TL: %i, %i", x, y);
break;
}
case GE_CMD_TRANSFERSIZE:
{
u32 w = (data & 1023)+1;
u32 h = ((data>>10) & 1023)+1;
DEBUG_LOG(G3D, "DL Block Transfer Rect Size: %i x %i", w, h);
break;
}
case GE_CMD_TRANSFERSTART:
{
u32 srcBasePtr = (gstate.transfersrc & 0xFFFFF0) | ((gstate.transfersrcw & 0xFF0000) << 8);
u32 srcStride = gstate.transfersrcw & 0x3F8;
u32 dstBasePtr = (gstate.transferdst & 0xFFFFF0) | ((gstate.transferdstw & 0xFF0000) << 8);
u32 dstStride = gstate.transferdstw & 0x3F8;
int srcX = gstate.transfersrcpos & 0x3FF;
int srcY = (gstate.transfersrcpos >> 10) & 0x3FF;
int dstX = gstate.transferdstpos & 0x3FF;
int dstY = (gstate.transferdstpos >> 10) & 0x3FF;
int width = (gstate.transfersize & 0x3FF) + 1;
int height = ((gstate.transfersize >> 10) & 0x3FF) + 1;
int bpp = (gstate.transferstart & 1) ? 4 : 2;
for (int y = 0; y < height; y++) {
const u8 *src = Memory::GetPointer(srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp);
u8 *dst = Memory::GetPointer(dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp);
memcpy(dst, src, width * bpp);
}
DEBUG_LOG(G3D, "DL Texture Transfer Start: PixFormat %i", data);
break;
}
case GE_CMD_TEXSIZE0:
gstate_c.textureChanged=true;
gstate_c.curTextureWidth = 1 << (gstate.texsize[0] & 0xf);
gstate_c.curTextureHeight = 1 << ((gstate.texsize[0]>>8) & 0xf);
//fall thru - ignoring the mipmap sizes for now
case GE_CMD_TEXSIZE1:
case GE_CMD_TEXSIZE2:
case GE_CMD_TEXSIZE3:
case GE_CMD_TEXSIZE4:
case GE_CMD_TEXSIZE5:
case GE_CMD_TEXSIZE6:
case GE_CMD_TEXSIZE7:
DEBUG_LOG(G3D,"DL Texture Size: %06x", data);
break;
case GE_CMD_ZBUFPTR:
{
u32 ptr = op & 0xFFE000;
depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8));
DEBUG_LOG(G3D,"Zbuf Ptr: %06x", ptr);
}
break;
case GE_CMD_ZBUFWIDTH:
{
u32 w = data & 0xFFFFFF;
depthbuf = Memory::GetPointer(0x44000000 | (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8));
DEBUG_LOG(G3D,"Zbuf Width: %i", w);
}
break;
case GE_CMD_AMBIENTCOLOR:
DEBUG_LOG(G3D,"DL Ambient Color: %06x", data);
break;
case GE_CMD_AMBIENTALPHA:
DEBUG_LOG(G3D,"DL Ambient Alpha: %06x", data);
break;
case GE_CMD_MATERIALAMBIENT:
DEBUG_LOG(G3D,"DL Material Ambient Color: %06x", data);
break;
case GE_CMD_MATERIALDIFFUSE:
DEBUG_LOG(G3D,"DL Material Diffuse Color: %06x", data);
break;
case GE_CMD_MATERIALEMISSIVE:
DEBUG_LOG(G3D,"DL Material Emissive Color: %06x", data);
break;
case GE_CMD_MATERIALSPECULAR:
DEBUG_LOG(G3D,"DL Material Specular Color: %06x", data);
break;
case GE_CMD_MATERIALALPHA:
DEBUG_LOG(G3D,"DL Material Alpha Color: %06x", data);
break;
case GE_CMD_MATERIALSPECULARCOEF:
DEBUG_LOG(G3D,"DL Material specular coef: %f", getFloat24(data));
break;
case GE_CMD_LIGHTTYPE0:
case GE_CMD_LIGHTTYPE1:
case GE_CMD_LIGHTTYPE2:
case GE_CMD_LIGHTTYPE3:
DEBUG_LOG(G3D,"DL Light %i type: %06x", cmd-GE_CMD_LIGHTTYPE0, data);
break;
case GE_CMD_LX0:case GE_CMD_LY0:case GE_CMD_LZ0:
case GE_CMD_LX1:case GE_CMD_LY1:case GE_CMD_LZ1:
case GE_CMD_LX2:case GE_CMD_LY2:case GE_CMD_LZ2:
case GE_CMD_LX3:case GE_CMD_LY3:case GE_CMD_LZ3:
{
int n = cmd - GE_CMD_LX0;
int l = n / 3;
int c = n % 3;
float val = getFloat24(data);
DEBUG_LOG(G3D,"DL Light %i %c pos: %f", l, c+'X', val);
gstate_c.lightpos[l][c] = val;
}
break;
case GE_CMD_LDX0:case GE_CMD_LDY0:case GE_CMD_LDZ0:
case GE_CMD_LDX1:case GE_CMD_LDY1:case GE_CMD_LDZ1:
case GE_CMD_LDX2:case GE_CMD_LDY2:case GE_CMD_LDZ2:
case GE_CMD_LDX3:case GE_CMD_LDY3:case GE_CMD_LDZ3:
{
int n = cmd - GE_CMD_LDX0;
int l = n / 3;
int c = n % 3;
float val = getFloat24(data);
DEBUG_LOG(G3D,"DL Light %i %c dir: %f", l, c+'X', val);
gstate_c.lightdir[l][c] = val;
}
break;
case GE_CMD_LKA0:case GE_CMD_LKB0:case GE_CMD_LKC0:
case GE_CMD_LKA1:case GE_CMD_LKB1:case GE_CMD_LKC1:
case GE_CMD_LKA2:case GE_CMD_LKB2:case GE_CMD_LKC2:
case GE_CMD_LKA3:case GE_CMD_LKB3:case GE_CMD_LKC3:
{
int n = cmd - GE_CMD_LKA0;
int l = n / 3;
int c = n % 3;
float val = getFloat24(data);
DEBUG_LOG(G3D,"DL Light %i %c att: %f", l, c+'X', val);
gstate_c.lightatt[l][c] = val;
}
break;
case GE_CMD_LAC0:case GE_CMD_LAC1:case GE_CMD_LAC2:case GE_CMD_LAC3:
case GE_CMD_LDC0:case GE_CMD_LDC1:case GE_CMD_LDC2:case GE_CMD_LDC3:
case GE_CMD_LSC0:case GE_CMD_LSC1:case GE_CMD_LSC2:case GE_CMD_LSC3:
{
float r = (float)(data>>16)/255.0f;
float g = (float)((data>>8) & 0xff)/255.0f;
float b = (float)(data & 0xff)/255.0f;
int l = (cmd - GE_CMD_LAC0) / 3;
int t = (cmd - GE_CMD_LAC0) % 3;
gstate_c.lightColor[t][l][0] = r;
gstate_c.lightColor[t][l][1] = g;
gstate_c.lightColor[t][l][2] = b;
}
break;
case GE_CMD_VIEWPORTX1:
case GE_CMD_VIEWPORTY1:
case GE_CMD_VIEWPORTZ1:
case GE_CMD_VIEWPORTX2:
case GE_CMD_VIEWPORTY2:
case GE_CMD_VIEWPORTZ2:
DEBUG_LOG(G3D,"DL Viewport param %i: %f", cmd-GE_CMD_VIEWPORTX1, getFloat24(data));
break;
case GE_CMD_LIGHTENABLE0:
case GE_CMD_LIGHTENABLE1:
case GE_CMD_LIGHTENABLE2:
case GE_CMD_LIGHTENABLE3:
DEBUG_LOG(G3D,"DL Light %i enable: %d", cmd-GE_CMD_LIGHTENABLE0, data);
break;
case GE_CMD_CULL:
DEBUG_LOG(G3D,"DL cull: %06x", data);
break;
case GE_CMD_LIGHTMODE:
DEBUG_LOG(G3D,"DL Shade mode: %06x", data);
break;
case GE_CMD_PATCHDIVISION:
break;
case GE_CMD_MATERIALUPDATE:
DEBUG_LOG(G3D,"DL Material Update: %d", data);
break;
//////////////////////////////////////////////////////////////////
// CLEARING
//////////////////////////////////////////////////////////////////
case GE_CMD_CLEARMODE:
DEBUG_LOG(G3D,"DL Clear mode: %06x", data);
break;
//////////////////////////////////////////////////////////////////
// ALPHA BLENDING
//////////////////////////////////////////////////////////////////
case GE_CMD_ALPHABLENDENABLE:
DEBUG_LOG(G3D,"DL Alpha blend enable: %d", data);
break;
case GE_CMD_BLENDMODE:
DEBUG_LOG(G3D,"DL Blend mode: %06x", data);
break;
case GE_CMD_BLENDFIXEDA:
DEBUG_LOG(G3D,"DL Blend fix A: %06x", data);
break;
case GE_CMD_BLENDFIXEDB:
DEBUG_LOG(G3D,"DL Blend fix B: %06x", data);
break;
case GE_CMD_ALPHATESTENABLE:
DEBUG_LOG(G3D,"DL Alpha test enable: %d", data);
// This is done in the shader.
break;
case GE_CMD_ALPHATEST:
DEBUG_LOG(G3D,"DL Alpha test settings");
break;
case GE_CMD_TEXFUNC:
DEBUG_LOG(G3D,"DL TexFunc %i", data&7);
break;
case GE_CMD_TEXFILTER:
{
int min = data & 7;
int mag = (data >> 8) & 1;
DEBUG_LOG(G3D,"DL TexFilter min: %i mag: %i", min, mag);
}
break;
//////////////////////////////////////////////////////////////////
// Z/STENCIL TESTING
//////////////////////////////////////////////////////////////////
case GE_CMD_ZTESTENABLE:
DEBUG_LOG(G3D,"DL Z test enable: %d", data & 1);
break;
case GE_CMD_STENCILTESTENABLE:
DEBUG_LOG(G3D,"DL Stencil test enable: %d", data);
break;
case GE_CMD_ZTEST:
DEBUG_LOG(G3D,"DL Z test mode: %i", data);
break;
case GE_CMD_MORPHWEIGHT0:
case GE_CMD_MORPHWEIGHT1:
case GE_CMD_MORPHWEIGHT2:
case GE_CMD_MORPHWEIGHT3:
case GE_CMD_MORPHWEIGHT4:
case GE_CMD_MORPHWEIGHT5:
case GE_CMD_MORPHWEIGHT6:
case GE_CMD_MORPHWEIGHT7:
{
int index = cmd - GE_CMD_MORPHWEIGHT0;
float weight = getFloat24(data);
DEBUG_LOG(G3D,"DL MorphWeight %i = %f", index, weight);
gstate_c.morphWeights[index] = weight;
}
break;
case GE_CMD_DITH0:
case GE_CMD_DITH1:
case GE_CMD_DITH2:
case GE_CMD_DITH3:
DEBUG_LOG(G3D,"DL DitherMatrix %i = %06x",cmd-GE_CMD_DITH0,data);
break;
case GE_CMD_WORLDMATRIXNUMBER:
DEBUG_LOG(G3D,"DL World matrix # %i", data);
gstate.worldmtxnum = data&0xF;
break;
case GE_CMD_WORLDMATRIXDATA:
DEBUG_LOG(G3D,"DL World matrix data # %f", getFloat24(data));
gstate.worldMatrix[gstate.worldmtxnum++] = getFloat24(data);
break;
case GE_CMD_VIEWMATRIXNUMBER:
DEBUG_LOG(G3D,"DL VIEW matrix # %i", data);
gstate.viewmtxnum = data&0xF;
break;
case GE_CMD_VIEWMATRIXDATA:
DEBUG_LOG(G3D,"DL VIEW matrix data # %f", getFloat24(data));
gstate.viewMatrix[gstate.viewmtxnum++] = getFloat24(data);
break;
case GE_CMD_PROJMATRIXNUMBER:
DEBUG_LOG(G3D,"DL PROJECTION matrix # %i", data);
gstate.projmtxnum = data&0xF;
break;
case GE_CMD_PROJMATRIXDATA:
DEBUG_LOG(G3D,"DL PROJECTION matrix data # %f", getFloat24(data));
gstate.projMatrix[gstate.projmtxnum++] = getFloat24(data);
break;
case GE_CMD_TGENMATRIXNUMBER:
DEBUG_LOG(G3D,"DL TGEN matrix # %i", data);
gstate.texmtxnum = data&0xF;
break;
case GE_CMD_TGENMATRIXDATA:
DEBUG_LOG(G3D,"DL TGEN matrix data # %f", getFloat24(data));
gstate.tgenMatrix[gstate.texmtxnum++] = getFloat24(data);
break;
case GE_CMD_BONEMATRIXNUMBER:
DEBUG_LOG(G3D,"DL BONE matrix #%i", data);
gstate.boneMatrixNumber = data;
break;
case GE_CMD_BONEMATRIXDATA:
DEBUG_LOG(G3D,"DL BONE matrix data #%i %f", gstate.boneMatrixNumber, getFloat24(data));
gstate.boneMatrix[gstate.boneMatrixNumber++] = getFloat24(data);
break;
default:
GPUCommon::ExecuteOp(op, diff);
break;
}
}
void SoftGPU::UpdateStats()
{
gpuStats.numVertexShaders = 0;
gpuStats.numFragmentShaders = 0;
gpuStats.numShaders = 0;
gpuStats.numTextures = 0;
}
void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)
{
// Nothing to invalidate.
}
void SoftGPU::UpdateMemory(u32 dest, u32 src, int size)
{
// Nothing to update.
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
}

52
GPU/Software/SoftGpu.h Normal file

@ -0,0 +1,52 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "../GPUCommon.h"
class ShaderManager;
class SoftGPU : public GPUCommon
{
public:
SoftGPU();
~SoftGPU();
virtual void InitClear() {}
virtual void ExecuteOp(u32 op, u32 diff);
virtual u32 DrawSync(int mode);
virtual void BeginFrame() {}
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {}
virtual void CopyDisplayToOutput();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual void UpdateMemory(u32 dest, u32 src, int size);
virtual void ClearCacheNextFrame() {};
virtual void DeviceLost() {}
virtual void DumpNextFrame() {}
virtual void Resized() {}
virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) {
primaryInfo = "NULL";
fullInfo = "NULL";
}
protected:
virtual void FastRunLoop(DisplayList &list);
};

@ -0,0 +1,404 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "../GPUState.h"
#include "../GLES/VertexDecoder.h"
#include "TransformUnit.h"
#include "Clipper.h"
#include "Lighting.h"
WorldCoords TransformUnit::ModelToWorld(const ModelCoords& coords)
{
Mat3x3<float> world_matrix(gstate.worldMatrix);
return WorldCoords(world_matrix * coords) + Vec3<float>(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]);
}
ViewCoords TransformUnit::WorldToView(const WorldCoords& coords)
{
Mat3x3<float> view_matrix(gstate.viewMatrix);
return ViewCoords(view_matrix * coords) + Vec3<float>(gstate.viewMatrix[9], gstate.viewMatrix[10], gstate.viewMatrix[11]);
}
ClipCoords TransformUnit::ViewToClip(const ViewCoords& coords)
{
Vec4<float> coords4(coords.x, coords.y, coords.z, 1.0f);
Mat4x4<float> projection_matrix(gstate.projMatrix);
return ClipCoords(projection_matrix * coords4);
}
static bool outside_range_flag = false;
// TODO: This is ugly
static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool set_flag = true)
{
ScreenCoords ret;
// TODO: Check for invalid parameters (x2 < x1, etc)
float vpx1 = getFloat24(gstate.viewportx1);
float vpx2 = getFloat24(gstate.viewportx2);
float vpy1 = getFloat24(gstate.viewporty1);
float vpy2 = getFloat24(gstate.viewporty2);
float vpz1 = getFloat24(gstate.viewportz1);
float vpz2 = getFloat24(gstate.viewportz2);
float retx = coords.x * vpx1 / coords.w + vpx2;
float rety = coords.y * vpy1 / coords.w + vpy2;
float retz = coords.z * vpz1 / coords.w + vpz2;
if (gstate.clipEnable & 0x1) {
if (retz < 0.f) retz = 0.f;
if (retz > 65535.f) retz = 65535.f;
}
if (set_flag && (retx > 4095.9375f || rety > 4096.9375f || retx < 0 || rety < 0 || retz < 0 || retz > 65535.f))
outside_range_flag = true;
// 16 = 0xFFFF / 4095.9375
return ScreenCoords(retx * 16, rety * 16, retz);
}
ScreenCoords TransformUnit::ClipToScreen(const ClipCoords& coords)
{
return ClipToScreenInternal(coords, false);
}
DrawingCoords TransformUnit::ScreenToDrawing(const ScreenCoords& coords)
{
DrawingCoords ret;
// TODO: What to do when offset > coord?
ret.x = (((u32)coords.x - (gstate.offsetx&0xffff))/16) & 0x3ff;
ret.y = (((u32)coords.y - (gstate.offsety&0xffff))/16) & 0x3ff;
ret.z = coords.z;
return ret;
}
ScreenCoords TransformUnit::DrawingToScreen(const DrawingCoords& coords)
{
ScreenCoords ret;
ret.x = (((u32)coords.x * 16 + (gstate.offsetx&0xffff)));
ret.y = (((u32)coords.y * 16 + (gstate.offsety&0xffff)));
ret.z = coords.z;
return ret;
}
static VertexData ReadVertex(VertexReader& vreader)
{
VertexData vertex;
float pos[3];
vreader.ReadPos(pos);
if (!gstate.isModeClear() && gstate.textureMapEnable && vreader.hasUV()) {
float uv[2];
vreader.ReadUV(uv);
vertex.texturecoords = Vec2<float>(uv[0], uv[1]);
}
if (vreader.hasNormal()) {
float normal[3];
vreader.ReadNrm(normal);
vertex.normal = Vec3<float>(normal[0], normal[1], normal[2]);
if (gstate.reversenormals & 1)
vertex.normal = -vertex.normal;
}
if (gstate.isSkinningEnabled() && !gstate.isModeThrough()) {
float W[8] = { 1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
vreader.ReadWeights(W);
Vec3<float> tmppos(0.f, 0.f, 0.f);
Vec3<float> tmpnrm(0.f, 0.f, 0.f);
for (int i = 0; i < gstate.getNumBoneWeights(); ++i) {
Mat3x3<float> bone(&gstate.boneMatrix[12*i]);
tmppos += W[i] * (bone * ModelCoords(pos[0], pos[1], pos[2]) + Vec3<float>(gstate.boneMatrix[12*i+9], gstate.boneMatrix[12*i+10], gstate.boneMatrix[12*i+11]));
if (vreader.hasNormal())
tmpnrm += W[i] * (bone * vertex.normal);
}
pos[0] = tmppos.x;
pos[1] = tmppos.y;
pos[2] = tmppos.z;
if (vreader.hasNormal())
vertex.normal = tmpnrm;
}
if (vreader.hasColor0()) {
float col[4];
vreader.ReadColor0(col);
vertex.color0 = Vec4<int>(col[0]*255, col[1]*255, col[2]*255, col[3]*255);
} else {
vertex.color0 = Vec4<int>(gstate.materialdiffuse&0xFF, (gstate.materialdiffuse>>8)&0xFF, (gstate.materialdiffuse>>16)&0xFF, gstate.materialalpha&0xFF);
}
if (vreader.hasColor1()) {
float col[3];
vreader.ReadColor0(col);
vertex.color1 = Vec3<int>(col[0]*255, col[1]*255, col[2]*255);
} else {
vertex.color1 = Vec3<int>(0, 0, 0);
}
if (!gstate.isModeThrough()) {
vertex.modelpos = ModelCoords(pos[0], pos[1], pos[2]);
vertex.worldpos = WorldCoords(TransformUnit::ModelToWorld(vertex.modelpos));
vertex.clippos = ClipCoords(TransformUnit::ViewToClip(TransformUnit::WorldToView(vertex.worldpos)));
vertex.screenpos = ClipToScreenInternal(vertex.clippos);
if (vreader.hasNormal()) {
vertex.worldnormal = TransformUnit::ModelToWorld(vertex.normal) - Vec3<float>(gstate.worldMatrix[9], gstate.worldMatrix[10], gstate.worldMatrix[11]);
vertex.worldnormal /= vertex.worldnormal.Length(); // TODO: Shouldn't be necessary..
}
Lighting::Process(vertex);
} else {
vertex.screenpos.x = (u32)pos[0] * 16 + (gstate.offsetx&0xffff);
vertex.screenpos.y = (u32)pos[1] * 16 + (gstate.offsety&0xffff);
vertex.screenpos.z = pos[2];
vertex.clippos.w = 1.f;
}
return vertex;
}
#define START_OPEN_U 1
#define END_OPEN_U 2
#define START_OPEN_V 4
#define END_OPEN_V 8
struct SplinePatch {
VertexData points[16];
int type;
};
void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type)
{
VertexDecoder vdecoder;
vdecoder.SetVertexType(vertex_type);
const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
static u8 buf[65536 * 48]; // yolo
u16 index_lower_bound = 0;
u16 index_upper_bound = count_u * count_v - 1;
bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
u8* indices8 = (u8*)indices;
u16* indices16 = (u16*)indices;
if (indices)
GetIndexBounds(indices, count_u*count_v, vertex_type, &index_lower_bound, &index_upper_bound);
vdecoder.DecodeVerts(buf, control_points, index_lower_bound, index_upper_bound);
VertexReader vreader(buf, vtxfmt, vertex_type);
int num_patches_u = count_u - 3;
int num_patches_v = count_v - 3;
// TODO: Do something less idiotic to manage this buffer
SplinePatch* patches = new SplinePatch[num_patches_u * num_patches_v];
for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) {
for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) {
SplinePatch& patch = patches[patch_u + patch_v * num_patches_u];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u + point%4) + (patch_v + point/4) * count_u;
if (indices)
vreader.Goto(indices_16bit ? indices16[idx] : indices8[idx]);
else
vreader.Goto(idx);
patch.points[point] = ReadVertex(vreader);
}
patch.type = (type_u | (type_v<<2));
if (patch_u != 0) patch.type &= ~START_OPEN_U;
if (patch_v != 0) patch.type &= ~START_OPEN_V;
if (patch_u != num_patches_u-1) patch.type &= ~END_OPEN_U;
if (patch_v != num_patches_v-1) patch.type &= ~END_OPEN_V;
}
}
for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) {
SplinePatch& patch = patches[patch_idx];
// TODO: Should do actual patch subdivision instead of just drawing the control points!
const int tile_min_u = (patch.type & START_OPEN_U) ? 0 : 1;
const int tile_min_v = (patch.type & START_OPEN_V) ? 0 : 1;
const int tile_max_u = (patch.type & END_OPEN_U) ? 3 : 2;
const int tile_max_v = (patch.type & END_OPEN_V) ? 3 : 2;
for (int tile_u = tile_min_u; tile_u < tile_max_u; ++tile_u) {
for (int tile_v = tile_min_v; tile_v < tile_max_v; ++tile_v) {
int point_index = tile_u + tile_v*4;
VertexData v0 = patch.points[point_index];
VertexData v1 = patch.points[point_index+1];
VertexData v2 = patch.points[point_index+4];
VertexData v3 = patch.points[point_index+5];
// TODO: Backface culling etc
Clipper::ProcessTriangle(v0, v1, v2);
Clipper::ProcessTriangle(v2, v1, v0);
Clipper::ProcessTriangle(v2, v1, v3);
Clipper::ProcessTriangle(v3, v1, v2);
}
}
}
delete[] patches;
}
void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type)
{
// TODO: Cache VertexDecoder objects
VertexDecoder vdecoder;
vdecoder.SetVertexType(vertex_type);
const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
static u8 buf[65536 * 48]; // yolo
u16 index_lower_bound = 0;
u16 index_upper_bound = vertex_count - 1;
bool indices_16bit = (vertex_type & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
u8* indices8 = (u8*)indices;
u16* indices16 = (u16*)indices;
if (indices)
GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
vdecoder.DecodeVerts(buf, vertices, index_lower_bound, index_upper_bound);
VertexReader vreader(buf, vtxfmt, vertex_type);
const int max_vtcs_per_prim = 3;
int vtcs_per_prim = 0;
if (prim_type == GE_PRIM_POINTS) vtcs_per_prim = 1;
else if (prim_type == GE_PRIM_LINES) vtcs_per_prim = 2;
else if (prim_type == GE_PRIM_TRIANGLES) vtcs_per_prim = 3;
else if (prim_type == GE_PRIM_RECTANGLES) vtcs_per_prim = 2;
else {
// TODO: Unsupported
}
if (prim_type == GE_PRIM_POINTS || prim_type == GE_PRIM_LINES || prim_type == GE_PRIM_TRIANGLES || prim_type == GE_PRIM_RECTANGLES) {
for (int vtx = 0; vtx < vertex_count; vtx += vtcs_per_prim) {
VertexData data[max_vtcs_per_prim];
for (int i = 0; i < vtcs_per_prim; ++i) {
if (indices)
vreader.Goto(indices_16bit ? indices16[vtx+i] : indices8[vtx+i]);
else
vreader.Goto(vtx+i);
data[i] = ReadVertex(vreader);
if (outside_range_flag)
break;
}
if (outside_range_flag) {
outside_range_flag = false;
continue;
}
switch (prim_type) {
case GE_PRIM_TRIANGLES:
{
if (!gstate.isCullEnabled() || gstate.isModeClear()) {
Clipper::ProcessTriangle(data[0], data[1], data[2]);
Clipper::ProcessTriangle(data[2], data[1], data[0]);
} else if (!gstate.getCullMode())
Clipper::ProcessTriangle(data[2], data[1], data[0]);
else
Clipper::ProcessTriangle(data[0], data[1], data[2]);
break;
}
case GE_PRIM_RECTANGLES:
Clipper::ProcessQuad(data[0], data[1]);
break;
}
}
} else if (prim_type == GE_PRIM_TRIANGLE_STRIP) {
VertexData data[3];
unsigned int skip_count = 2; // Don't draw a triangle when loading the first two vertices
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices)
vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]);
else
vreader.Goto(vtx);
data[vtx % 3] = ReadVertex(vreader);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
outside_range_flag = false;
continue;
}
if (skip_count) {
--skip_count;
continue;
}
if (!gstate.isCullEnabled() || gstate.isModeClear()) {
Clipper::ProcessTriangle(data[0], data[1], data[2]);
Clipper::ProcessTriangle(data[2], data[1], data[0]);
} else if ((!gstate.getCullMode()) ^ (vtx % 2)) {
// We need to reverse the vertex order for each second primitive,
// but we additionally need to do that for every primitive if CCW cullmode is used.
Clipper::ProcessTriangle(data[2], data[1], data[0]);
} else {
Clipper::ProcessTriangle(data[0], data[1], data[2]);
}
}
} else if (prim_type == GE_PRIM_TRIANGLE_FAN) {
VertexData data[3];
unsigned int skip_count = 1; // Don't draw a triangle when loading the first two vertices
if (indices)
vreader.Goto(indices_16bit ? indices16[0] : indices8[0]);
else
vreader.Goto(0);
data[0] = ReadVertex(vreader);
for (int vtx = 1; vtx < vertex_count; ++vtx) {
if (indices)
vreader.Goto(indices_16bit ? indices16[vtx] : indices8[vtx]);
else
vreader.Goto(vtx);
data[2 - (vtx % 2)] = ReadVertex(vreader);
if (outside_range_flag) {
// Drop all primitives containing the current vertex
skip_count = 2;
outside_range_flag = false;
continue;
}
if (skip_count) {
--skip_count;
continue;
}
if (!gstate.isCullEnabled() || gstate.isModeClear()) {
Clipper::ProcessTriangle(data[0], data[1], data[2]);
Clipper::ProcessTriangle(data[2], data[1], data[0]);
} else if ((!gstate.getCullMode()) ^ (vtx % 2)) {
// We need to reverse the vertex order for each second primitive,
// but we additionally need to do that for every primitive if CCW cullmode is used.
Clipper::ProcessTriangle(data[2], data[1], data[0]);
} else {
Clipper::ProcessTriangle(data[0], data[1], data[2]);
}
}
}
}

@ -0,0 +1,120 @@
// Copyright (c) 2013- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "CommonTypes.h"
#include "../Math3D.h"
typedef u16 fixed16;
typedef u16 u10; // TODO: erm... :/
typedef Vec3<float> ModelCoords;
typedef Vec3<float> WorldCoords;
typedef Vec3<float> ViewCoords;
typedef Vec4<float> ClipCoords; // Range: -w <= x/y/z <= w
struct ScreenCoords
{
ScreenCoords() {}
ScreenCoords(fixed16 x, fixed16 y, u16 z) : x(x), y(y), z(z) {}
fixed16 x;
fixed16 y;
u16 z;
Vec2<fixed16> xy() const { return Vec2<fixed16>(x, y); }
ScreenCoords operator * (const float t) const
{
return ScreenCoords(x * t, y * t, z * t);
}
ScreenCoords operator / (const int t) const
{
return ScreenCoords(x / t, y / t, z / t);
}
ScreenCoords operator + (const ScreenCoords& oth) const
{
return ScreenCoords(x + oth.x, y + oth.y, z + oth.z);
}
};
struct DrawingCoords
{
DrawingCoords() {}
DrawingCoords(u10 x, u10 y, u16 z) : x(x), y(y), z(z) {}
u10 x;
u10 y;
u16 z;
Vec2<u10> xy() const { return Vec2<u10>(x, y); }
DrawingCoords operator * (const float t) const
{
return DrawingCoords(x * t, y * t, z * t);
}
DrawingCoords operator + (const DrawingCoords& oth) const
{
return DrawingCoords(x + oth.x, y + oth.y, z + oth.z);
}
};
struct VertexData
{
void Lerp(float t, const VertexData& a, const VertexData& b)
{
// World coords only needed for lighting, so we don't Lerp those
modelpos = ::Lerp(a.modelpos, b.modelpos, t);
clippos = ::Lerp(a.clippos, b.clippos, t);
screenpos = ::Lerp(a.screenpos, b.screenpos, t); // TODO: Should use a LerpInt (?)
texturecoords = ::Lerp(a.texturecoords, b.texturecoords, t);
normal = ::Lerp(a.normal, b.normal, t);
u16 t_int =(u16)(t*256);
color0 = LerpInt<Vec4<int>,256>(a.color0, b.color0, t_int);
color1 = LerpInt<Vec3<int>,256>(a.color1, b.color1, t_int);
}
ModelCoords modelpos;
WorldCoords worldpos; // TODO: Storing this is dumb, should transform the light to clip space instead
ClipCoords clippos;
ScreenCoords screenpos; // TODO: Shouldn't store this ?
Vec2<float> texturecoords;
Vec3<float> normal;
WorldCoords worldnormal;
Vec4<int> color0;
Vec3<int> color1;
};
class TransformUnit
{
public:
static WorldCoords ModelToWorld(const ModelCoords& coords);
static ViewCoords WorldToView(const WorldCoords& coords);
static ClipCoords ViewToClip(const ViewCoords& coords);
static ScreenCoords ClipToScreen(const ClipCoords& coords);
static DrawingCoords ScreenToDrawing(const ScreenCoords& coords);
static ScreenCoords DrawingToScreen(const DrawingCoords& coords);
static void SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, u32 prim_type, u32 vertex_type);
static void SubmitPrimitive(void* vertices, void* indices, u32 prim_type, int vertex_count, u32 vertex_type);
};

@ -330,13 +330,19 @@ enum GEMatrixType {
enum GEComparison
{
GE_COMP_NEVER=0,
GE_COMP_ALWAYS,
GE_COMP_EQUAL,
GE_COMP_NOTEQUAL,
GE_COMP_LESS,
GE_COMP_LEQUAL,
GE_COMP_GREATER,
GE_COMP_GEQUAL
GE_COMP_ALWAYS=1,
GE_COMP_EQUAL=2,
GE_COMP_NOTEQUAL=3,
GE_COMP_LESS=4,
GE_COMP_LEQUAL=5,
GE_COMP_GREATER=6,
GE_COMP_GEQUAL=7
};
enum GEShadeMode
{
GE_SHADE_FLAT=0,
GE_SHADE_GOURAUD
};
enum GELightType
@ -434,11 +440,11 @@ enum GETexFunc
enum GEStencilOp
{
GE_STENCILOP_KEEP=0,
GE_STENCILOP_ZERO=0,
GE_STENCILOP_REPLACE=0,
GE_STENCILOP_INVERT=0,
GE_STENCILOP_INCR=0,
GE_STENCILOP_DECR=0,
GE_STENCILOP_ZERO=1,
GE_STENCILOP_REPLACE=2,
GE_STENCILOP_INVERT=3,
GE_STENCILOP_INCR=4,
GE_STENCILOP_DECR=5,
};
@ -451,6 +457,21 @@ enum GEStencilOp
#define GE_TFILT_NEAREST_MIPMAP_LINEAR 6
#define GE_TFILT_LINEAR_MIPMAP_LINEAR 7
enum GETexMapMode
{
GE_TEXMAP_TEXTURE_COORDS=0,
GE_TEXMAP_TEXTURE_MATRIX=1,
GE_TEXMAP_ENVIRONMENT_MAP=2,
};
enum GETexProjMapMode
{
GE_PROJMAP_POSITION=0,
GE_PROJMAP_UV=1,
GE_PROJMAP_NORMALIZED_NORMAL=2,
GE_PROJMAP_NORMAL=3
};
enum GEPrimitiveType
{
GE_PRIM_POINTS=0,
@ -482,6 +503,13 @@ enum GELogicOp
GE_LOGIC_SET=15
};
enum GEPatchPrimType
{
GE_PATCHPRIM_TRIANGLES=0,
GE_PATCHPRIM_LINES=1,
GE_PATCHPRIM_POINTS=2,
};
enum GEPaletteFormat
{
GE_CMODE_16BIT_BGR5650,

@ -49,6 +49,7 @@ SOURCES += ../Core/*.cpp \ # Core
../GPU/Math3D.cpp \
../GPU/Null/NullGpu.cpp \
../GPU/GLES/*.cpp \
../GPU/Software/*.cpp \
../ext/libkirk/*.c \ # Kirk
../ext/xxhash.c \ # xxHash
../ext/xbrz/*.cpp # XBRZ

@ -64,7 +64,7 @@ void EmuScreen::bootGame(const std::string &filename) {
CoreParameter coreParam;
coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER;
coreParam.gpuCore = GPU_GLES;
coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES;
coreParam.enableSound = g_Config.bEnableSound;
coreParam.fileToStart = fileToStart;
coreParam.mountIso = "";

@ -445,6 +445,7 @@ void PauseScreen::render() {
#endif
UICheckBox(GEN_ID, x, y += stride, gs->T("Stretch to Display"), ALIGN_TOPLEFT, &g_Config.bStretchToDisplay);
UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering);
UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform);
bool enableFrameSkip = g_Config.iFrameSkip != 0;
UICheckBox(GEN_ID, x, y += stride , gs->T("Frame Skipping"), ALIGN_TOPLEFT, &enableFrameSkip);
@ -939,6 +940,7 @@ void GraphicsScreenP1::render() {
int stride = 40;
int columnw = 400;
UICheckBox(GEN_ID, x, y += stride, gs->T("Software Rendering"), ALIGN_TOPLEFT, &g_Config.bSoftwareRendering);
#ifndef __SYMBIAN32__
UICheckBox(GEN_ID, x, y += stride, gs->T("Hardware Transform"), ALIGN_TOPLEFT, &g_Config.bHardwareTransform);
#endif

@ -183,6 +183,11 @@ LOCAL_SRC_FILES := \
$(SRC)/GPU/GLES/FragmentShaderGenerator.cpp \
$(SRC)/GPU/GLES/TextureScaler.cpp \
$(SRC)/GPU/Null/NullGpu.cpp \
$(SRC)/GPU/Software/Clipper.cpp \
$(SRC)/GPU/Software/Lighting.cpp \
$(SRC)/GPU/Software/Rasterizer.cpp \
$(SRC)/GPU/Software/SoftGpu.cpp \
$(SRC)/GPU/Software/TransformUnit.cpp \
$(SRC)/Core/ELF/ElfReader.cpp \
$(SRC)/Core/ELF/PBPReader.cpp \
$(SRC)/Core/ELF/PrxDecrypter.cpp \

@ -57,7 +57,7 @@ void RunTests()
CoreParameter coreParam;
coreParam.cpuCore = g_Config.bJit ? CPU_JIT : CPU_INTERPRETER;
coreParam.gpuCore = GPU_GLES;
coreParam.gpuCore = g_Config.bSoftwareRendering ? GPU_SOFTWARE : GPU_GLES;
coreParam.enableSound = g_Config.bEnableSound;
coreParam.mountIso = "";
coreParam.startPaused = false;