Directx9 Gpu

This commit is contained in:
Ced2911 2013-08-17 11:23:51 +02:00
parent 0b2cd9ccf4
commit 3188c00629
30 changed files with 11528 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,90 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <list>
#include <deque>
#include "../GPUCommon.h"
#include "Framebuffer.h"
#include "VertexDecoder.h"
#include "TransformPipeline.h"
#include "TextureCache.h"
#include "helper/fbo.h"
class ShaderManager;
class LinkedShader;
class DIRECTX9_GPU : public GPUCommon
{
public:
DIRECTX9_GPU();
~DIRECTX9_GPU();
virtual void InitClear();
virtual void PreExecuteOp(u32 op, u32 diff);
virtual void ExecuteOp(u32 op, u32 diff);
virtual u32 DrawSync(int mode);
virtual void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
virtual void CopyDisplayToOutput();
virtual void BeginFrame();
virtual void UpdateStats();
virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type);
virtual void UpdateMemory(u32 dest, u32 src, int size);
virtual void ClearCacheNextFrame();
virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders.
virtual void DumpNextFrame();
virtual void Flush();
virtual void DoState(PointerWrap &p);
// Called by the window system if the window size changed. This will be reflected in PSPCoreParam.pixel*.
virtual void Resized();
virtual bool DecodeTexture(u8* dest, GPUgstate state)
{
return textureCache_.DecodeTexture(dest, state);
}
virtual bool FramebufferDirty();
virtual void GetReportingInfo(std::string &primaryInfo, std::string &fullInfo) {
primaryInfo = reportingPrimaryInfo_;
fullInfo = reportingFullInfo_;
}
std::vector<FramebufferInfo> GetFramebufferList();
protected:
virtual void FastRunLoop(DisplayList &list);
private:
void DoBlockTransfer();
void ApplyDrawState(int prim);
void CheckFlushOp(u32 op, u32 diff);
void BuildReportingInfo();
FramebufferManager framebufferManager_;
TextureCache textureCache_;
TransformDrawEngine transformDraw_;
ShaderManager *shaderManager_;
u8 *flushBeforeCommand_;
bool resized_;
int lastVsync_;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;
};

View File

@ -0,0 +1,315 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "FragmentShaderGenerator.h"
#include "../ge_constants.h"
#include "../GPUState.h"
#include <cstdio>
#define WRITE p+=sprintf
// #define DEBUG_SHADER
// GL_NV_shader_framebuffer_fetch looks interesting....
static bool IsAlphaTestTriviallyTrue() {
int alphaTestFunc = gstate.alphatest & 7;
int alphaTestRef = (gstate.alphatest >> 8) & 0xFF;
switch (alphaTestFunc) {
case GE_COMP_ALWAYS:
return true;
case GE_COMP_GEQUAL:
if (alphaTestRef == 0)
return true;
// This breaks the trees in MotoGP, for example.
// case GE_COMP_GREATER:
//if (alphaTestRef == 0 && (gstate.alphaBlendEnable & 1) && gstate.getBlendFuncA() == GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncB() == GE_SRCBLEND_INVSRCALPHA)
// return true;
case GE_COMP_LEQUAL:
if (alphaTestRef == 255)
return true;
default:
return false;
}
}
static bool IsColorTestTriviallyTrue() {
int colorTestFunc = gstate.colortest & 3;
switch (colorTestFunc) {
case GE_COMP_ALWAYS:
return true;
default:
return false;
}
}
static bool CanDoubleSrcBlendMode() {
if (!gstate.isAlphaBlendEnabled()) {
return false;
}
int funcA = gstate.getBlendFuncA();
int funcB = gstate.getBlendFuncB();
if (funcA != GE_SRCBLEND_DOUBLESRCALPHA) {
funcB = funcA;
funcA = gstate.getBlendFuncB();
}
if (funcA != GE_SRCBLEND_DOUBLESRCALPHA) {
return false;
}
// One side should be doubled. Let's check the other side.
// LittleBigPlanet, for example, uses 2.0 * src, 1.0 - src, which can't double.
switch (funcB) {
case GE_DSTBLEND_SRCALPHA:
case GE_DSTBLEND_INVSRCALPHA:
return false;
default:
return true;
}
}
// Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FragmentShaderID *id) {
memset(&id->d[0], 0, sizeof(id->d));
if (gstate.clearmode & 1) {
// We only need one clear shader, so let's ignore the rest of the bits.
id->d[0] = 1;
} else {
int lmode = (gstate.lmode & 1) && gstate.isLightingEnabled();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
bool enableColorDoubling = (gstate.texfunc & 0x10000) != 0;
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
bool enableAlphaDoubling = CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == 1;
bool doTextureAlpha = (gstate.texfunc & 0x100) != 0;
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
if (gstate_c.textureFullAlpha && (gstate.texfunc & 0x7) != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
// id->d[0] |= (gstate.clearmode & 1);
if (gstate.isTextureMapEnabled()) {
id->d[0] |= 1 << 1;
id->d[0] |= (gstate.texfunc & 0x7) << 2;
id->d[0] |= (doTextureAlpha & 1) << 5; // rgb or rgba
}
id->d[0] |= (lmode & 1) << 7;
id->d[0] |= gstate.isAlphaTestEnabled() << 8;
if (enableAlphaTest)
id->d[0] |= (gstate.alphatest & 0x7) << 9; // alpha test func
id->d[0] |= gstate.isColorTestEnabled() << 12;
if (enableColorTest)
id->d[0] |= (gstate.colortest & 0x3) << 13; // color test func
id->d[0] |= (enableFog & 1) << 15;
id->d[0] |= (doTextureProjection & 1) << 16;
id->d[0] |= (enableColorDoubling & 1) << 17;
id->d[0] |= (enableAlphaDoubling & 1) << 18;
}
}
// Missing: Z depth range
// Also, logic ops etc, of course. Urgh.
#if 0
void GenerateFragmentShader(char *buffer) {
//--------------------------------------------------------------------------------------
// Pixel shader
//--------------------------------------------------------------------------------------
const char * pscode =
" sampler s: register(s0); "
" struct PS_IN "
" { "
" float3 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" float4 main( PS_IN In ) : COLOR "
" { "
//" float4 c = In.C1; "
" float4 c = tex2D(s, In.Uv.xy); "
" return c; "
" } ";
strcpy(buffer, pscode);
}
#else
void GenerateFragmentShader(char *buffer) {
char *p = buffer;
int lmode = (gstate.lmode & 1) && gstate.isLightingEnabled();
int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear();
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear();
bool enableColorDoubling = (gstate.texfunc & 0x10000) != 0;
// This isn't really correct, but it's a hack to get doubled blend modes to work more correctly.
bool enableAlphaDoubling = CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == 1;
bool doTextureAlpha = (gstate.texfunc & 0x100) != 0;
if (gstate_c.textureFullAlpha && (gstate.texfunc & 0x7) != GE_TEXFUNC_REPLACE)
doTextureAlpha = false;
if (doTexture)
WRITE(p, "sampler tex: register(s0);\n");
if (enableAlphaTest || enableColorTest) {
WRITE(p, "float4 u_alphacolorref;\n");
WRITE(p, "float3 u_colormask;\n");
}
if (gstate.isTextureMapEnabled())
WRITE(p, "float3 u_texenv;\n");
if (enableFog) {
WRITE(p, "float3 u_fogcolor;\n");
}
if (enableAlphaTest) {
WRITE(p, "float roundAndScaleTo255f(float x) { return floor(x * 255.0f + 0.5f); }\n");
}
if (enableColorTest) {
WRITE(p, "float3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n");
}
WRITE(p, " struct PS_IN ");
WRITE(p, " { ");
WRITE(p, " float4 v_texcoord: TEXCOORD0; ");
WRITE(p, " float4 v_color0: COLOR0; ");
WRITE(p, " float4 v_color1: COLOR1; ");
if (enableFog) {
WRITE(p, "float v_fogdepth:FOG;\n");
}
WRITE(p, " }; ");
WRITE(p, " ");
WRITE(p, " float4 main( PS_IN In ) : COLOR ");
WRITE(p, " { ");
if (gstate.isModeClear()) {
// Clear mode does not allow any fancy shading.
WRITE(p, " return In.v_color0;\n");
} else {
const char *secondary = "";
// Secondary color for specular on top of texture
if (lmode) {
WRITE(p, " float4 s = float4(In.v_color1);\n");
secondary = " + s";
} else {
secondary = "";
}
if (gstate.textureMapEnable & 1) {
if (doTextureProjection) {
WRITE(p, " float4 t = tex2Dproj(tex, In.v_texcoord);\n");
} else {
WRITE(p, " float4 t = tex2D(tex, In.v_texcoord.xy);\n");
}
WRITE(p, " float4 p = In.v_color0;\n");
if (doTextureAlpha) { // texfmt == RGBA
switch (gstate.texfunc & 0x7) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " float4 v = p * t%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
WRITE(p, " float4 v = float4(lerp(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND:
WRITE(p, " float4 v = float4(lerp(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " float4 v = t%s;\n", secondary); break;
case GE_TEXFUNC_ADD:
WRITE(p, " float4 v = float4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary); break;
default:
WRITE(p, " float4 v = p;\n"); break;
}
} else { // texfmt == RGB
switch (gstate.texfunc & 0x7) {
case GE_TEXFUNC_MODULATE:
WRITE(p, " float4 v = float4(t.rgb * p.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_DECAL:
WRITE(p, " float4 v = float4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_BLEND:
WRITE(p, " float4 v = float4(lerp(p.rgb, u_texenv.rgb, t.rgb), p.a)%s;\n", secondary); break;
case GE_TEXFUNC_REPLACE:
WRITE(p, " float4 v = float4(t.rgb, p.a)%s;\n", secondary); break;
case GE_TEXFUNC_ADD:
WRITE(p, " float4 v = float4(p.rgb + t.rgb, p.a)%s;\n", secondary); break;
default:
WRITE(p, " float4 v = p;\n"); break;
}
}
} else {
// No texture mapping
WRITE(p, " float4 v = In.v_color0 %s;\n", secondary);
// HACK ONLY DISPLAY TEXTuRE !!!
//WRITE(p, " clip(-1);\n");
}
if (enableAlphaTest) {
int alphaTestFunc = gstate.alphatest & 7;
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " }; // never/always don't make sense
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
// WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) discard;\n", alphaTestFuncs[alphaTestFunc]);
//WRITE(p, "clip((roundAndScaleTo255f(v.rgb) %s u_alphacolorref.a)? -1:1);\n", alphaTestFuncs[alphaTestFunc]);
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) clip(-1);\n", alphaTestFuncs[alphaTestFunc]);
}
}
// TODO: Before or after the color test?
if (enableColorDoubling && enableAlphaDoubling) {
WRITE(p, " v = v * 2.0;\n");
} else if (enableColorDoubling) {
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
} else if (enableAlphaDoubling) {
WRITE(p, " v.a = v.a * 2.0;\n");
}
if (enableColorTest) {
int colorTestFunc = gstate.colortest & 3;
const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense
int colorTestMask = gstate.colormask;
if (colorTestFuncs[colorTestFunc][0] != '#') {
//WRITE(p, "clip((roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb)? -1:1);\n", colorTestFuncs[colorTestFunc]);
//WRITE(p, "if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) clip(-1);\n", colorTestFuncs[colorTestFunc]);
// cleanup ?
const char * test = colorTestFuncs[colorTestFunc];
WRITE(p, "float3 colortest = roundAndScaleTo255v(v.rgb);\n");
WRITE(p, "if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test);
}
}
if (enableFog) {
WRITE(p, " float fogCoef = clamp(In.v_fogdepth, 0.0, 1.0);\n");
WRITE(p, " return lerp(float4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n");
} else {
WRITE(p, " return v;\n");
}
}
WRITE(p, "}\n");
}
#endif

View File

@ -0,0 +1,52 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Globals.h"
struct FragmentShaderID
{
FragmentShaderID() {d[0] = 0xFFFFFFFF;}
void clear() {d[0] = 0xFFFFFFFF;}
u32 d[1];
bool operator < (const FragmentShaderID &other) const
{
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
{
if (d[i] < other.d[i])
return true;
if (d[i] > other.d[i])
return false;
}
return false;
}
bool operator == (const FragmentShaderID &other) const
{
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
{
if (d[i] != other.d[i])
return false;
}
return true;
}
};
void ComputeFragmentShaderID(FragmentShaderID *id);
void GenerateFragmentShader(char *buffer);

View File

@ -0,0 +1,968 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "math/lin/matrix4x4.h"
#include "Core/Host.h"
#include "Core/MemMap.h"
#include "Core/Config.h"
#include "Core/System.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "helper/dx_state.h"
#include "helper/fbo.h"
#include "GPU/Directx9/Framebuffer.h"
#include "GPU/Directx9/TextureCache.h"
#include "GPU/Directx9/ShaderManager.h"
// Aggressively delete unused FBO:s to save gpu memory.
enum {
FBO_OLD_AGE = 5,
};
static bool MaskedEqual(u32 addr1, u32 addr2) {
return (addr1 & 0x3FFFFFF) == (addr2 & 0x3FFFFFF);
}
inline u16 RGBA8888toRGB565(u32 px) {
return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800);
}
inline u16 RGBA8888toRGBA4444(u32 px) {
return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000);
}
inline u16 RGBA8888toRGBA5551(u32 px) {
return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
}
void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format);
void CenterRect(float *x, float *y, float *w, float *h,
float origW, float origH, float frameW, float frameH)
{
if (g_Config.bStretchToDisplay)
{
*x = 0;
*y = 0;
*w = frameW;
*h = frameH;
return;
}
float origRatio = origW/origH;
float frameRatio = frameW/frameH;
if (origRatio > frameRatio)
{
// Image is wider than frame. Center vertically.
float scale = origW / frameW;
*x = 0.0f;
*w = frameW;
*h = frameW / origRatio;
#ifdef BLACKBERRY
// Stretch a little bit
if (g_Config.bPartialStretch)
*h = (frameH + *h) / 2.0f; // (408 + 720) / 2 = 564
#endif
*y = (frameH - *h) / 2.0f;
}
else
{
// Image is taller than frame. Center horizontally.
float scale = origH / frameH;
*y = 0.0f;
*h = frameH;
*w = frameH * origRatio;
*x = (frameW - *w) / 2.0f;
}
}
FramebufferManager::FramebufferManager() :
ramDisplayFramebufPtr_(0),
displayFramebufPtr_(0),
displayStride_(0),
displayFormat_(GE_FORMAT_565),
displayFramebuf_(0),
prevDisplayFramebuf_(0),
prevPrevDisplayFramebuf_(0),
frameLastFramebufUsed(0),
currentRenderVfb_(0),
drawPixelsTex_(0),
drawPixelsTexFormat_(GE_FORMAT_INVALID),
convBuf(0)
{
#if 0
draw2dprogram = glsl_create_source(basic_vs, tex_fs);
glsl_bind(draw2dprogram);
glUniform1i(draw2dprogram->sampler0, 0);
glsl_unbind();
#endif
// And an initial clear. We don't clear per frame as the games are supposed to handle that
// by themselves.
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
pD3Ddevice->CreateTexture(512, 272, 1, 0, D3DFMT(D3DFMT_A8R8G8B8), NULL, &drawPixelsTex_, NULL);
useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE ? 1 : 0;
}
FramebufferManager::~FramebufferManager() {
#if 0
if (drawPixelsTex_)
glDeleteTextures(1, &drawPixelsTex_);
glsl_destroy(draw2dprogram);
#endif
if(drawPixelsTex_) {
drawPixelsTex_->Release();
}
delete [] convBuf;
}
static inline void ARGB8From4444(u16 c, u32 * dst) {
*dst = ((c & 0xf) << 4) | (((c >> 4) & 0xf) << 12) | (((c >> 8) & 0xf) << 20) | ((c >> 12) << 28);
}
static inline void ARGB8From565(u16 c, u32 * dst) {
*dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x003f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000;
}
static inline void ARGB8From5551(u16 c, u32 * dst) {
*dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000;
}
void FramebufferManager::DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize) {
u8 * convBuf = NULL;
D3DLOCKED_RECT rect;
drawPixelsTex_->LockRect(0, &rect, NULL, D3DLOCK_NOOVERWRITE);
convBuf = (u8*)rect.pBits;
// Final format is ARGB(directx)
// TODO: We can just change the texture format and flip some bits around instead of this.
if (pixelFormat != GE_FORMAT_8888 || linesize != 512) {
for (int y = 0; y < 272; y++) {
switch (pixelFormat) {
// not tested
case GE_FORMAT_565:
{
const u16 *src = (const u16 *)framebuf + linesize * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++) {
u16 col0 = LE_16(src[x+0]);
ARGB8From565(col0, &dst[x + 0]);
}
}
break;
// faster
case GE_FORMAT_5551:
{
const u16 *src = (const u16 *)framebuf + linesize * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++) {
u16 col0 = LE_16(src[x+0]);
ARGB8From5551(col0, &dst[x + 0]);
}
}
break;
// not tested
case GE_FORMAT_4444:
{
const u16 *src = (const u16 *)framebuf + linesize * y;
u32 *dst = (u32*)(convBuf + rect.Pitch * y);
for (int x = 0; x < 480; x++)
{
u16 col = LE_16(src[x]);
dst[x * 4 + 0] = (col >> 12) << 4;
dst[x * 4 + 1] = ((col >> 8) & 0xf) << 4;
dst[x * 4 + 2] = ((col >> 4) & 0xf) << 4;
dst[x * 4 + 3] = (col & 0xf) << 4;
}
}
break;
case GE_FORMAT_8888:
{
const u8 *src = framebuf + linesize * 4 * y;
u8 *dst = convBuf + rect.Pitch * y;
memcpy(dst, src, 4 * 480);
}
break;
}
}
} else {
memcpy(convBuf, framebuf, 4 * 480 * 512);
}
drawPixelsTex_->UnlockRect(0);
// D3DXSaveTextureToFile("game:\\cc.png", D3DXIFF_PNG, drawPixelsTex_, NULL);
pD3Ddevice->SetTexture(0, drawPixelsTex_);
float x, y, w, h;
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
DrawActiveTexture(x, y, w, h, false, 480.0f / 512.0f);
}
void FramebufferManager::DrawActiveTexture(float x, float y, float w, float h, bool flip, float uscale, float vscale) {
float u2 = uscale;
// Since we're flipping, 0 is down. That's where the scale goes.
float v1 = flip ? 1.0f : 1.0f - vscale;
float v2 = flip ? 1.0f - vscale : 1.0f;
const float coord[] = {
x, y, 0, 0, v1,
x+w, y, 0, u2, v1,
x+w, y+h, 0, u2, v2,
x, y+h, 0, 0, v2
};
Matrix4x4 ortho;
ortho.setOrtho(0, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight, 0, -1, 1);
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
pD3Ddevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
pD3Ddevice->SetVertexShaderConstantF(0, ortho.getReadPtr(), 4);
pD3Ddevice->SetVertexDeclaration(pFramebufferVertexDecl);
pD3Ddevice->SetPixelShader(pFramebufferPixelShader);
pD3Ddevice->SetVertexShader(pFramebufferVertexShader);
pD3Ddevice->SetTexture(0, drawPixelsTex_);
pD3Ddevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, coord, 5 * sizeof(float));
}
VirtualFramebuffer *FramebufferManager::GetDisplayFBO() {
VirtualFramebuffer *match = NULL;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
if (MaskedEqual(v->fb_address, displayFramebufPtr_) && v->format == displayFormat_ && v->width >= 480) {
// Could check w too but whatever
if (match == NULL || match->last_frame_used < v->last_frame_used) {
match = v;
}
}
}
if (match != NULL) {
return match;
}
DEBUG_LOG(HLE, "Finding no FBO matching address %08x", displayFramebufPtr_);
#if 0 // defined(_DEBUG)
std::string debug = "FBOs: ";
for (size_t i = 0; i < vfbs_.size(); ++i) {
char temp[256];
sprintf(temp, "%08x %i %i", vfbs_[i]->fb_address, vfbs_[i]->width, vfbs_[i]->height);
debug += std::string(temp);
}
ERROR_LOG(HLE, "FBOs: %s", debug.c_str());
#endif
return 0;
}
void GetViewportDimensions(int &w, int &h) {
float vpXa = getFloat24(gstate.viewportx1);
float vpYa = getFloat24(gstate.viewporty1);
w = (int)fabsf(vpXa * 2);
h = (int)fabsf(vpYa * 2);
}
// Heuristics to figure out the size of FBO to create.
void GuessDrawingSize(int &drawing_width, int &drawing_height) {
int viewport_width, viewport_height;
int default_width = 480;
int default_height = 272;
int regionX2 = (gstate.getRegionX2() + 1) ;
int regionY2 = (gstate.getRegionY2() + 1) ;
int fb_stride = gstate.fbwidth & 0x3C0;
GetViewportDimensions(viewport_width, viewport_height);
// Generated FBO shouldn't greate than 512x512
if ( viewport_width > 512 && viewport_height > 512 ) {
viewport_width = default_width;
viewport_height = default_height;
}
if (fb_stride < 512) {
drawing_width = std::min(viewport_width, regionX2);
drawing_height = std::min(viewport_height, regionY2);
} else {
drawing_width = std::max(viewport_width, default_width);
drawing_height = std::max(viewport_height, default_height);
}
}
void FramebufferManager::DestroyFramebuf(VirtualFramebuffer *v) {
textureCache_->NotifyFramebufferDestroyed(v->fb_address, v);
if (v->fbo) {
fbo_destroy(v->fbo);
v->fbo = 0;
}
// Wipe some pointers
if (currentRenderVfb_ == v)
currentRenderVfb_ = 0;
if (displayFramebuf_ == v)
displayFramebuf_ = 0;
if (prevDisplayFramebuf_ == v)
prevDisplayFramebuf_ = 0;
if (prevPrevDisplayFramebuf_ == v)
prevPrevDisplayFramebuf_ = 0;
delete v;
}
void FramebufferManager::SetRenderFrameBuffer() {
if (!gstate_c.framebufChanged && currentRenderVfb_) {
currentRenderVfb_->last_frame_used = gpuStats.numFrames;
return;
}
gstate_c.framebufChanged = false;
// Get parameters
u32 fb_address = (gstate.fbptr & 0xFFE000) | ((gstate.fbwidth & 0xFF0000) << 8);
int fb_stride = gstate.fbwidth & 0x3C0;
u32 z_address = (gstate.zbptr & 0xFFE000) | ((gstate.zbwidth & 0xFF0000) << 8);
int z_stride = gstate.zbwidth & 0x3C0;
// Yeah this is not completely right. but it'll do for now.
//int drawing_width = ((gstate.region2) & 0x3FF) + 1;
//int drawing_height = ((gstate.region2 >> 10) & 0x3FF) + 1;
// As there are no clear "framebuffer width" and "framebuffer height" registers,
// we need to infer the size of the current framebuffer somehow. Let's try the viewport.
GEBufferFormat fmt = static_cast<GEBufferFormat>(gstate.framebufpixformat & 3);
int drawing_width, drawing_height;
GuessDrawingSize(drawing_width, drawing_height);
int buffer_width = drawing_width;
int buffer_height = drawing_height;
// Find a matching framebuffer
VirtualFramebuffer *vfb = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
if (MaskedEqual(v->fb_address, fb_address) && v->format == fmt) {
// Let's not be so picky for now. Let's say this is the one.
vfb = v;
// Update fb stride in case it changed
vfb->fb_stride = fb_stride;
if (v->bufferWidth >= drawing_width && v->bufferHeight >= drawing_height) {
v->width = drawing_width;
v->height = drawing_height;
}
break;
}
}
float renderWidthFactor = (float)PSP_CoreParameter().renderWidth / 480.0f;
float renderHeightFactor = (float)PSP_CoreParameter().renderHeight / 272.0f;
// None found? Create one.
if (!vfb) {
gstate_c.textureChanged = true;
vfb = new VirtualFramebuffer();
vfb->fbo = 0;
vfb->fb_address = fb_address;
vfb->fb_stride = fb_stride;
vfb->z_address = z_address;
vfb->z_stride = z_stride;
vfb->width = drawing_width;
vfb->height = drawing_height;
vfb->renderWidth = (u16)(drawing_width * renderWidthFactor);
vfb->renderHeight = (u16)(drawing_height * renderHeightFactor);
vfb->bufferWidth = buffer_width;
vfb->bufferHeight = buffer_height;
vfb->format = fmt;
vfb->usageFlags = FB_USAGE_RENDERTARGET;
vfb->dirtyAfterDisplay = true;
if (g_Config.bTrueColor) {
vfb->colorDepth = FBO_8888;
} else {
switch (fmt) {
case GE_FORMAT_4444:
vfb->colorDepth = FBO_4444;
break;
case GE_FORMAT_5551:
vfb->colorDepth = FBO_5551;
break;
case GE_FORMAT_565:
vfb->colorDepth = FBO_565;
break;
case GE_FORMAT_8888:
vfb->colorDepth = FBO_8888;
break;
default:
vfb->colorDepth = FBO_8888;
break;
}
}
//#ifdef ANDROID
// vfb->colorDepth = FBO_8888;
//#endif
if (useBufferedRendering_) {
vfb->fbo = fbo_create(vfb->renderWidth, vfb->renderHeight, 1, true, vfb->colorDepth);
if (vfb->fbo) {
fbo_bind_as_render_target(vfb->fbo);
} else {
ERROR_LOG(HLE, "Error creating FBO! %i x %i", vfb->renderWidth, vfb->renderHeight);
}
} else {
fbo_unbind();
// Let's ignore rendering to targets that have not (yet) been displayed.
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
}
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb);
vfb->last_frame_used = gpuStats.numFrames;
frameLastFramebufUsed = gpuStats.numFrames;
vfbs_.push_back(vfb);
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
currentRenderVfb_ = vfb;
INFO_LOG(HLE, "Creating FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
// We already have it!
} else if (vfb != currentRenderVfb_) {
// Use it as a render target.
DEBUG_LOG(HLE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged = true;
vfb->last_frame_used = gpuStats.numFrames;
frameLastFramebufUsed = gpuStats.numFrames;
vfb->dirtyAfterDisplay = true;
if (useBufferedRendering_) {
if (vfb->fbo) {
fbo_bind_as_render_target(vfb->fbo);
} else {
// wtf? This should only happen very briefly when toggling bBufferedRendering
fbo_unbind();
}
} else {
if (vfb->fbo) {
// wtf? This should only happen very briefly when toggling bBufferedRendering
textureCache_->NotifyFramebufferDestroyed(vfb->fb_address, vfb);
fbo_destroy(vfb->fbo);
vfb->fbo = 0;
}
fbo_unbind();
// Let's ignore rendering to targets that have not (yet) been displayed.
if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER)
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
else
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
/*
if (drawing_width == 480 && drawing_height == 272) {
gstate_c.skipDrawReason &= ~SKIPDRAW_SKIPNONFB;
// OK!
} else {
gstate_c.skipDrawReason |= ~SKIPDRAW_SKIPNONFB;
}*/
}
textureCache_->NotifyFramebuffer(vfb->fb_address, vfb);
#if 1
// Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering
// to it. This broke stuff before, so now it only clears on the first use of an
// FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs
// performance-crushing framebuffer reloads from RAM, but we'll have to live with that.
if (vfb->last_frame_used != gpuStats.numFrames) {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
}
#endif
currentRenderVfb_ = vfb;
} else {
vfb->last_frame_used = gpuStats.numFrames;
frameLastFramebufUsed = gpuStats.numFrames;
}
// ugly...
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
shaderManager_->DirtyUniform(DIRTY_PROJTHROUGHMATRIX);
gstate_c.curRTWidth = vfb->width;
gstate_c.curRTHeight = vfb->height;
}
}
void FramebufferManager::CopyDisplayToOutput() {
fbo_unbind();
currentRenderVfb_ = 0;
VirtualFramebuffer *vfb = GetDisplayFBO();
if (!vfb) {
if (Memory::IsValidAddress(ramDisplayFramebufPtr_)) {
// The game is displaying something directly from RAM. In GTA, it's decoded video.
DrawPixels(Memory::GetPointer(ramDisplayFramebufPtr_), displayFormat_, displayStride_);
} else if (Memory::IsValidAddress(displayFramebufPtr_)) {
// The game is displaying something directly from RAM. In GTA, it's decoded video.
DrawPixels(Memory::GetPointer(displayFramebufPtr_), displayFormat_, displayStride_);
} else {
DEBUG_LOG(HLE, "Found no FBO to display! displayFBPtr = %08x", displayFramebufPtr_);
// No framebuffer to display! Clear to black.
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
}
return;
}
vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
vfb->dirtyAfterDisplay = false;
if (prevDisplayFramebuf_ != displayFramebuf_) {
prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
}
if (displayFramebuf_ != vfb) {
prevDisplayFramebuf_ = displayFramebuf_;
}
displayFramebuf_ = vfb;
if (vfb->fbo) {
dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
DEBUG_LOG(HLE, "Displaying FBO %08x", vfb->fb_address);
dxstate.blend.disable();
dxstate.cullMode.set(false, false);
dxstate.depthTest.disable();
dxstate.scissorTest.disable();
dxstate.stencilTest.disable();
// Resolve
//fbo_resolve(vfb->fbo);
fbo_bind_color_as_texture(vfb->fbo, 0);
// These are in the output display coordinates
float x, y, w, h;
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
DrawActiveTexture(x, y, w, h, true, 480.0f / (float)vfb->width, 272.0f / (float)vfb->height);
pD3Ddevice->SetTexture(0, NULL);
}
if (resized_) {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
}
}
void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb) {
// This only works with buffered rendering
if (!useBufferedRendering_) {
return;
}
if(vfb) {
// We'll pseudo-blit framebuffers here to get a resized and flipped version of vfb.
// For now we'll keep these on the same struct as the ones that can get displayed
// (and blatantly copy work already done above while at it).
VirtualFramebuffer *nvfb = 0;
// We maintain a separate vector of framebuffer objects for blitting.
for (size_t i = 0; i < bvfbs_.size(); ++i) {
VirtualFramebuffer *v = bvfbs_[i];
if (MaskedEqual(v->fb_address, vfb->fb_address) && v->format == vfb->format) {
if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
nvfb = v;
v->fb_stride = vfb->fb_stride;
v->width = vfb->width;
v->height = vfb->height;
break;
}
}
}
// Create a new fbo if none was found for the size
if(!nvfb) {
nvfb = new VirtualFramebuffer();
nvfb->fbo = 0;
nvfb->fb_address = vfb->fb_address;
nvfb->fb_stride = vfb->fb_stride;
nvfb->z_address = vfb->z_address;
nvfb->z_stride = vfb->z_stride;
nvfb->width = vfb->width;
nvfb->height = vfb->height;
nvfb->renderWidth = vfb->width;
nvfb->renderHeight = vfb->height;
nvfb->bufferWidth = vfb->bufferWidth;
nvfb->bufferHeight = vfb->bufferHeight;
nvfb->format = vfb->format;
nvfb->usageFlags = FB_USAGE_RENDERTARGET;
nvfb->dirtyAfterDisplay = true;
if(g_Config.bTrueColor) {
nvfb->colorDepth = FBO_8888;
} else {
switch (vfb->format) {
case GE_FORMAT_4444:
nvfb->colorDepth = FBO_4444;
break;
case GE_FORMAT_5551:
nvfb->colorDepth = FBO_5551;
break;
case GE_FORMAT_565:
nvfb->colorDepth = FBO_565;
break;
case GE_FORMAT_8888:
default:
nvfb->colorDepth = FBO_8888;
break;
}
}
nvfb->fbo = fbo_create(nvfb->width, nvfb->height, 1, true, nvfb->colorDepth);
if (!(nvfb->fbo)) {
ERROR_LOG(HLE, "Error creating FBO! %i x %i", nvfb->renderWidth, nvfb->renderHeight);
}
if (useBufferedRendering_) {
if (nvfb->fbo) {
fbo_bind_as_render_target(nvfb->fbo);
} else {
fbo_unbind();
return;
}
}
nvfb->last_frame_used = gpuStats.numFrames;
bvfbs_.push_back(nvfb);
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
} else {
nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
nvfb->last_frame_used = gpuStats.numFrames;
nvfb->dirtyAfterDisplay = true;
if (useBufferedRendering_) {
if (nvfb->fbo) {
fbo_bind_as_render_target(nvfb->fbo);
#if 1
// Some tiled mobile GPUs benefit IMMENSELY from clearing an FBO before rendering
// to it. This broke stuff before, so now it only clears on the first use of an
// FBO in a frame. This means that some games won't be able to avoid the on-some-GPUs
// performance-crushing framebuffer reloads from RAM, but we'll have to live with that.
if (nvfb->last_frame_used != gpuStats.numFrames) {
dxstate.depthWrite.set(true);
dxstate.colorMask.set(true, true, true, true);
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_XRGB(0, 0, 0), 0, 0);
}
#endif
} else {
fbo_unbind();
return;
}
}
}
BlitFramebuffer_(vfb, nvfb, false);
PackFramebufferDirectx9_(nvfb);
}
}
void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip, float upscale, float vscale) {
// This only works with buffered rendering
if (!useBufferedRendering_ || !src->fbo) {
return;
}
fbo_bind_as_render_target(dst->fbo);
/*
if(glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
ERROR_LOG(HLE, "Incomplete target framebuffer, aborting blit");
fbo_unbind();
return;
}
*/
dxstate.viewport.set(0, 0, dst->width, dst->height);
dxstate.depthTest.disable();
dxstate.blend.disable();
dxstate.cullMode.set(0, 0);
dxstate.depthTest.disable();
dxstate.scissorTest.disable();
dxstate.stencilTest.disable();
fbo_bind_color_as_texture(src->fbo, 0);
float x, y, w, h;
CenterRect(&x, &y, &w, &h, 480.0f, 272.0f, (float)PSP_CoreParameter().pixelWidth, (float)PSP_CoreParameter().pixelHeight);
DrawActiveTexture(x, y, w, h, flip, upscale, vscale);
pD3Ddevice->SetTexture(0, NULL);
fbo_unbind();
}
// TODO: SSE/NEON
void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 stride, u32 height, GEBufferFormat format) {
if(format == GE_FORMAT_8888) {
if(src == dst) {
return;
} else { // Here lets assume they don't intersect
memcpy(dst, src, stride * height * 4);
}
} else { // But here it shouldn't matter if they do
int size = height * stride;
const u32 *src32 = (const u32 *)src;
u16 *dst16 = (u16 *)dst;
switch (format) {
case GE_FORMAT_565: // BGR 565
for(int i = 0; i < size; i++) {
dst16[i] = RGBA8888toRGB565(src32[i]);
}
break;
case GE_FORMAT_5551: // ABGR 1555
for(int i = 0; i < size; i++) {
dst16[i] = RGBA8888toRGBA5551(src32[i]);
}
break;
case GE_FORMAT_4444: // ABGR 4444
for(int i = 0; i < size; i++) {
dst16[i] = RGBA8888toRGBA4444(src32[i]);
}
break;
case GE_FORMAT_8888:
// Not possible.
break;
default:
break;
}
}
}
#include <xgraphics.h>
static void Resolve(u8* data, VirtualFramebuffer *vfb) {
#ifdef _XBOX
D3DTexture * rtt = (D3DTexture*)fbo_get_rtt(vfb->fbo);
pD3Ddevice->Resolve(D3DRESOLVE_RENDERTARGET0, NULL, rtt, NULL, 0, 0, NULL, 0.f, 0, NULL);
D3DLOCKED_RECT p;
rtt->LockRect(0, &p, NULL, 0);
rtt->UnlockRect(0);
// vfb->fbo->tex is tilled !!!!
XGUntileTextureLevel(vfb->width, vfb->height, 0, D3DFMT_LIN_A8R8G8B8, XGTILE_NONPACKED, data, p.Pitch, NULL, p.pBits, NULL);
#endif
}
void FramebufferManager::PackFramebufferDirectx9_(VirtualFramebuffer *vfb) {
if (useBufferedRendering_ && vfb->fbo) {
fbo_bind_for_read(vfb->fbo);
} else {
fbo_unbind();
return;
}
// Pixel size always 4 here because we always request RGBA8888
size_t bufSize = vfb->fb_stride * vfb->height * 4;
u32 fb_address = (0x44000000) | vfb->fb_address;
u8 *packed = 0;
if(vfb->format == GE_FORMAT_8888) {
packed = (u8 *)Memory::GetPointer(fb_address);
} else { // End result may be 16-bit but we are reading 32-bit, so there may not be enough space at fb_address
packed = (u8 *)malloc(bufSize * sizeof(u8));
}
if(packed) {
DEBUG_LOG(HLE, "Reading framebuffer to mem, bufSize = %u, packed = %p, fb_address = %08x",
(u32)bufSize, packed, fb_address);
Resolve(packed, vfb);
if(vfb->format != GE_FORMAT_8888) { // If not RGBA 8888 we need to convert
ConvertFromRGBA8888(Memory::GetPointer(fb_address), packed, vfb->fb_stride, vfb->height, vfb->format);
free(packed);
}
}
fbo_unbind();
}
void FramebufferManager::EndFrame() {
if (resized_) {
DestroyAllFBOs();
dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
resized_ = false;
}
}
void FramebufferManager::DeviceLost() {
DestroyAllFBOs();
resized_ = false;
}
void FramebufferManager::BeginFrame() {
DecimateFBOs();
currentRenderVfb_ = 0;
useBufferedRendering_ = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE ? 1 : 0;
}
void FramebufferManager::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
if ((framebuf & 0x04000000) == 0) {
DEBUG_LOG(HLE, "Non-VRAM display framebuffer address set: %08x", framebuf);
ramDisplayFramebufPtr_ = framebuf;
displayStride_ = stride;
displayFormat_ = format;
} else {
ramDisplayFramebufPtr_ = 0;
displayFramebufPtr_ = framebuf;
displayStride_ = stride;
displayFormat_ = format;
}
}
std::vector<FramebufferInfo> FramebufferManager::GetFramebufferList() {
std::vector<FramebufferInfo> list;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
FramebufferInfo info;
info.fb_address = vfb->fb_address;
info.z_address = vfb->z_address;
info.format = vfb->format;
info.width = vfb->width;
info.height = vfb->height;
info.fbo = vfb->fbo;
list.push_back(info);
}
return list;
}
void FramebufferManager::DecimateFBOs() {
fbo_unbind();
currentRenderVfb_ = 0;
int num = g_Config.iFrameSkip > 0 && g_Config.iFrameSkip != 9 ? g_Config.iFrameSkip : 3;
bool skipFrame = (gpuStats.numFrames % num == 0);
bool useFramebufferToMem = g_Config.iRenderingMode != FB_BUFFERED_MODE ? 1 : 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
int age = frameLastFramebufUsed - vfb->last_frame_used;
if(useFramebufferToMem) {
// Commit framebuffers to memory
if(skipFrame && age <= FBO_OLD_AGE)
ReadFramebufferToMemory(vfb);
}
if (vfb == displayFramebuf_ || vfb == prevDisplayFramebuf_ || vfb == prevPrevDisplayFramebuf_) {
continue;
}
if (age > FBO_OLD_AGE) {
INFO_LOG(HLE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age)
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
}
// Do the same for ReadFramebuffersToMemory's VFBs
for (size_t i = 0; i < bvfbs_.size(); ++i) {
VirtualFramebuffer *vfb = bvfbs_[i];
int age = frameLastFramebufUsed - vfb->last_frame_used;
if (age > FBO_OLD_AGE) {
INFO_LOG(HLE, "Decimating FBO for %08x (%i x %i x %i), age %i", vfb->fb_address, vfb->width, vfb->height, vfb->format, age)
DestroyFramebuf(vfb);
bvfbs_.erase(bvfbs_.begin() + i--);
}
}
}
void FramebufferManager::DestroyAllFBOs() {
fbo_unbind();
currentRenderVfb_ = 0;
displayFramebuf_ = 0;
prevDisplayFramebuf_ = 0;
prevPrevDisplayFramebuf_ = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
INFO_LOG(HLE, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->format);
DestroyFramebuf(vfb);
}
vfbs_.clear();
}
void FramebufferManager::UpdateFromMemory(u32 addr, int size) {
addr &= ~0x40000000;
// TODO: Could go through all FBOs, but probably not important?
// TODO: Could also check for inner changes, but video is most important.
if (addr == DisplayFramebufAddr() || addr == PrevDisplayFramebufAddr()) {
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
if (!Memory::IsValidAddress(displayFramebufPtr_))
return;
fbo_unbind();
currentRenderVfb_ = 0;
bool needUnbind = false;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
if (MaskedEqual(vfb->fb_address, addr)) {
vfb->dirtyAfterDisplay = true;
// TODO: This without the fbo_unbind() above would be better than destroying the FBO.
// However, it doesn't seem to work for Star Ocean, at least
if (useBufferedRendering_) {
fbo_bind_as_render_target(vfb->fbo);
needUnbind = true;
DrawPixels(Memory::GetPointer(addr), vfb->format, vfb->fb_stride);
} else {
INFO_LOG(HLE, "Invalidating FBO for %08x (%i x %i x %i)", vfb->fb_address, vfb->width, vfb->height, vfb->format)
DestroyFramebuf(vfb);
vfbs_.erase(vfbs_.begin() + i--);
}
}
}
if (needUnbind)
fbo_unbind();
}
}
void FramebufferManager::Resized() {
resized_ = true;
}

164
GPU/Directx9/Framebuffer.h Normal file
View File

@ -0,0 +1,164 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <list>
#include "helper/fbo.h"
// Keeps track of allocated FBOs.
// Also provides facilities for drawing and later converting raw
// pixel data.
#include "../Globals.h"
#include "GPU/GPUCommon.h"
struct GLSLProgram;
class TextureCache;
enum {
FB_USAGE_DISPLAYED_FRAMEBUFFER = 1,
FB_USAGE_RENDERTARGET = 2,
FB_USAGE_TEXTURE = 4,
};
enum {
FB_NON_BUFFERED_MODE = 0,
FB_BUFFERED_MODE = 1,
FB_READFBOMEMORY_CPU = 2,
FB_READFBOMEMORY_GPU = 3,
};
struct VirtualFramebuffer {
int last_frame_used;
u32 fb_address;
u32 z_address;
int fb_stride;
int z_stride;
// There's also a top left of the drawing region, but meh...
// width/height: The detected size of the current framebuffer.
u16 width;
u16 height;
// renderWidth/renderHeight: The actual size we render at. May be scaled to render at higher resolutions.
u16 renderWidth;
u16 renderHeight;
// bufferWidth/bufferHeight: The actual (but non scaled) size of the buffer we render to. May only be bigger than width/height.
u16 bufferWidth;
u16 bufferHeight;
u16 usageFlags;
GEBufferFormat format; // virtual, right now they are all RGBA8888
FBOColorDepth colorDepth;
FBO *fbo;
bool dirtyAfterDisplay;
};
void CenterRect(float *x, float *y, float *w, float *h,
float origW, float origH, float frameW, float frameH);
class ShaderManager;
class FramebufferManager {
public:
FramebufferManager();
~FramebufferManager();
void SetTextureCache(TextureCache *tc) {
textureCache_ = tc;
}
void SetShaderManager(ShaderManager *sm) {
shaderManager_ = sm;
}
void DrawPixels(const u8 *framebuf, GEBufferFormat pixelFormat, int linesize);
void DrawActiveTexture(float x, float y, float w, float h, bool flip = false, float uscale = 1.0f, float vscale = 1.0f);
void DestroyAllFBOs();
void DecimateFBOs();
void BeginFrame();
void EndFrame();
void Resized();
void DeviceLost();
void CopyDisplayToOutput();
void SetRenderFrameBuffer(); // Uses parameters computed from gstate
void UpdateFromMemory(u32 addr, int size);
void ReadFramebufferToMemory(VirtualFramebuffer *vfb);
// TODO: Break out into some form of FBO manager
VirtualFramebuffer *GetDisplayFBO();
void SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format);
size_t NumVFBs() const { return vfbs_.size(); }
std::vector<FramebufferInfo> GetFramebufferList();
int GetRenderWidth() const { return currentRenderVfb_ ? currentRenderVfb_->renderWidth : 480; }
int GetRenderHeight() const { return currentRenderVfb_ ? currentRenderVfb_->renderHeight : 272; }
int GetTargetWidth() const { return currentRenderVfb_ ? currentRenderVfb_->width : 480; }
int GetTargetHeight() const { return currentRenderVfb_ ? currentRenderVfb_->height : 272; }
u32 PrevDisplayFramebufAddr() {
return prevDisplayFramebuf_ ? (0x04000000 | prevDisplayFramebuf_->fb_address) : 0;
}
u32 DisplayFramebufAddr() {
return displayFramebuf_ ? (0x04000000 | displayFramebuf_->fb_address) : 0;
}
void DestroyFramebuf(VirtualFramebuffer *vfb);
private:
u32 ramDisplayFramebufPtr_; // workaround for MotoGP insanity
u32 displayFramebufPtr_;
u32 displayStride_;
GEBufferFormat displayFormat_;
VirtualFramebuffer *displayFramebuf_;
VirtualFramebuffer *prevDisplayFramebuf_;
VirtualFramebuffer *prevPrevDisplayFramebuf_;
int frameLastFramebufUsed;
std::vector<VirtualFramebuffer *> vfbs_;
VirtualFramebuffer *currentRenderVfb_;
// Used by ReadFramebufferToMemory
void BlitFramebuffer_(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool flip = false, float upscale = 1.0f, float vscale = 1.0f);
void PackFramebufferDirectx9_(VirtualFramebuffer *vfb);
int gpuVendor;
std::vector<VirtualFramebuffer *> bvfbs_; // blitting FBOs
// Used by DrawPixels
LPDIRECT3DTEXTURE9 drawPixelsTex_;
GEBufferFormat drawPixelsTexFormat_;
u8 *convBuf;
GLSLProgram *draw2dprogram;
TextureCache *textureCache_;
ShaderManager *shaderManager_;
bool resized_;
bool useBufferedRendering_;
};

View File

@ -0,0 +1,359 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "IndexGenerator.h"
#include "Common/Common.h"
// Points don't need indexing...
static const u8 indexedPrimitiveType[7] = {
GE_PRIM_POINTS,
GE_PRIM_LINES,
GE_PRIM_LINES,
GE_PRIM_TRIANGLES,
GE_PRIM_TRIANGLES,
GE_PRIM_TRIANGLES,
GE_PRIM_RECTANGLES,
};
void IndexGenerator::Reset() {
prim_ = -1;
count_ = 0;
index_ = 0;
seenPrims_ = 0;
pureCount_ = 0;
this->inds_ = indsBase_;
}
bool IndexGenerator::PrimCompatible(int prim1, int prim2) {
if (prim1 == -1)
return true;
return indexedPrimitiveType[prim1] == indexedPrimitiveType[prim2];
}
bool IndexGenerator::PrimCompatible(int prim) {
if (prim_ == -1)
return true;
return indexedPrimitiveType[prim] == prim_;
}
void IndexGenerator::Setup(u16 *inds) {
this->indsBase_ = inds;
Reset();
}
void IndexGenerator::AddPrim(int prim, int vertexCount) {
switch (prim) {
case GE_PRIM_POINTS: AddPoints(vertexCount); break;
case GE_PRIM_LINES: AddLineList(vertexCount); break;
case GE_PRIM_LINE_STRIP: AddLineStrip(vertexCount); break;
case GE_PRIM_TRIANGLES: AddList(vertexCount); break;
case GE_PRIM_TRIANGLE_STRIP: AddStrip(vertexCount); break;
case GE_PRIM_TRIANGLE_FAN: AddFan(vertexCount); break;
case GE_PRIM_RECTANGLES: AddRectangles(vertexCount); break; // Same
}
}
void IndexGenerator::AddPoints(int numVerts) {
for (int i = 0; i < numVerts; i++)
*inds_++ = index_ + i;
// ignore overflow verts
index_ += numVerts;
count_ += numVerts;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= 1 << GE_PRIM_POINTS;
}
void IndexGenerator::AddList(int numVerts) {
int numTris = numVerts / 3;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ + i*3;
*inds_++ = index_ + i*3 + 1;
*inds_++ = index_ + i*3 + 2;
}
// ignore overflow verts
index_ += numVerts;
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
}
void IndexGenerator::AddStrip(int numVerts) {
bool wind = false;
int numTris = numVerts - 2;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ + i;
*inds_++ = index_ + i+(wind?2:1);
*inds_++ = index_ + i+(wind?1:2);
wind = !wind;
}
index_ += numVerts;
count_ += numTris * 3;
// This is so we can detect one single strip by just looking at seenPrims_.
if (!seenPrims_) {
seenPrims_ = 1 << GE_PRIM_TRIANGLE_STRIP;
prim_ = GE_PRIM_TRIANGLE_STRIP;
pureCount_ = numVerts;
} else {
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_STRIP;
seenPrims_ |= 1 << GE_PRIM_TRIANGLES;
prim_ = GE_PRIM_TRIANGLES;
pureCount_ = 0;
}
}
void IndexGenerator::AddFan(int numVerts) {
int numTris = numVerts - 2;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_;
*inds_++ = index_ + i + 1;
*inds_++ = index_ + i + 2;
}
index_ += numVerts;
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= 1 << GE_PRIM_TRIANGLE_FAN;
}
//Lines
void IndexGenerator::AddLineList(int numVerts) {
int numLines = numVerts / 2;
for (int i = 0; i < numLines; i++) {
*inds_++ = index_ + i*2;
*inds_++ = index_ + i*2+1;
}
index_ += numVerts;
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << prim_;
}
void IndexGenerator::AddLineStrip(int numVerts) {
int numLines = numVerts - 1;
for (int i = 0; i < numLines; i++) {
*inds_++ = index_ + i;
*inds_++ = index_ + i + 1;
}
index_ += numVerts;
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= 1 << GE_PRIM_LINE_STRIP;
}
void IndexGenerator::AddRectangles(int numVerts) {
int numRects = numVerts / 2;
for (int i = 0; i < numRects; i++) {
*inds_++ = index_ + i*2;
*inds_++ = index_ + i*2+1;
}
index_ += numVerts;
count_ += numRects * 2;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= 1 << GE_PRIM_RECTANGLES;
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same
}
}
void IndexGenerator::TranslatePrim(int prim, int numInds, const u16 *inds, int indexOffset) {
switch (prim) {
case GE_PRIM_POINTS: TranslatePoints(numInds, inds, indexOffset); break;
case GE_PRIM_LINES: TranslateLineList(numInds, inds, indexOffset); break;
case GE_PRIM_LINE_STRIP: TranslateLineStrip(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLES: TranslateList(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_STRIP: TranslateStrip(numInds, inds, indexOffset); break;
case GE_PRIM_TRIANGLE_FAN: TranslateFan(numInds, inds, indexOffset); break;
case GE_PRIM_RECTANGLES: TranslateRectangles(numInds, inds, indexOffset); break; // Same
}
}
void IndexGenerator::TranslatePoints(int numInds, const u8 *inds, int indexOffset) {
for (int i = 0; i < numInds; i++)
*inds_++ = index_ - indexOffset + inds[i];
count_ += numInds;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= (1 << GE_PRIM_POINTS) | SEEN_INDEX8;
}
void IndexGenerator::TranslatePoints(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
for (int i = 0; i < numInds; i++)
*inds_++ = index_ - indexOffset + inds[i];
count_ += numInds;
prim_ = GE_PRIM_POINTS;
seenPrims_ |= (1 << GE_PRIM_POINTS) | SEEN_INDEX16;
}
void IndexGenerator::TranslateList(int numInds, const u8 *inds, int indexOffset) {
int numTris = numInds / 3;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ - indexOffset + inds[i*3];
*inds_++ = index_ - indexOffset + inds[i*3 + 1];
*inds_++ = index_ - indexOffset + inds[i*3 + 2];
}
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | SEEN_INDEX8;
}
void IndexGenerator::TranslateStrip(int numInds, const u8 *inds, int indexOffset) {
bool wind = false;
int numTris = numInds - 2;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ - indexOffset + inds[i];
*inds_++ = index_ - indexOffset + inds[i + (wind?2:1)];
*inds_++ = index_ - indexOffset + inds[i + (wind?1:2)];
wind = !wind;
}
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX8;
}
void IndexGenerator::TranslateFan(int numInds, const u8 *inds, int indexOffset) {
if (numInds <= 0) return;
int numTris = numInds - 2;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ - indexOffset + inds[0];
*inds_++ = index_ - indexOffset + inds[i + 1];
*inds_++ = index_ - indexOffset + inds[i + 2];
}
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | SEEN_INDEX8;
}
void IndexGenerator::TranslateList(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
int numTris = numInds / 3;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ - indexOffset + inds[i*3];
*inds_++ = index_ - indexOffset + inds[i*3 + 1];
*inds_++ = index_ - indexOffset + inds[i*3 + 2];
}
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLES) | SEEN_INDEX16;
}
void IndexGenerator::TranslateStrip(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
bool wind = false;
int numTris = numInds - 2;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ - indexOffset + inds[i];
*inds_++ = index_ - indexOffset + inds[i + (wind?2:1)];
*inds_++ = index_ - indexOffset + inds[i + (wind?1:2)];
wind = !wind;
}
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_STRIP) | SEEN_INDEX16;
}
void IndexGenerator::TranslateFan(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
if (numInds <= 0) return;
int numTris = numInds - 2;
for (int i = 0; i < numTris; i++) {
*inds_++ = index_ - indexOffset + inds[0];
*inds_++ = index_ - indexOffset + inds[i + 1];
*inds_++ = index_ - indexOffset + inds[i + 2];
}
count_ += numTris * 3;
prim_ = GE_PRIM_TRIANGLES;
seenPrims_ |= (1 << GE_PRIM_TRIANGLE_FAN) | SEEN_INDEX16;
}
void IndexGenerator::TranslateLineList(int numInds, const u8 *inds, int indexOffset) {
int numLines = numInds / 2;
for (int i = 0; i < numLines; i++) {
*inds_++ = index_ - indexOffset + inds[i*2];
*inds_++ = index_ - indexOffset + inds[i*2+1];
}
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINES) | SEEN_INDEX8;
}
void IndexGenerator::TranslateLineStrip(int numInds, const u8 *inds, int indexOffset) {
int numLines = numInds - 1;
for (int i = 0; i < numLines; i++) {
*inds_++ = index_ - indexOffset + inds[i];
*inds_++ = index_ - indexOffset + inds[i + 1];
}
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | SEEN_INDEX8;
}
void IndexGenerator::TranslateLineList(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
int numLines = numInds / 2;
for (int i = 0; i < numLines; i++) {
*inds_++ = index_ - indexOffset + inds[i*2];
*inds_++ = index_ - indexOffset + inds[i*2+1];
}
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINES) | SEEN_INDEX16;
}
void IndexGenerator::TranslateLineStrip(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
int numLines = numInds - 1;
for (int i = 0; i < numLines; i++) {
*inds_++ = index_ - indexOffset + inds[i];
*inds_++ = index_ - indexOffset + inds[i + 1];
}
count_ += numLines * 2;
prim_ = GE_PRIM_LINES;
seenPrims_ |= (1 << GE_PRIM_LINE_STRIP) | SEEN_INDEX16;
}
void IndexGenerator::TranslateRectangles(int numInds, const u8 *inds, int indexOffset) {
int numRects = numInds / 2;
for (int i = 0; i < numRects; i++) {
*inds_++ = index_ - indexOffset + inds[i*2];
*inds_++ = index_ - indexOffset + inds[i*2+1];
}
count_ += numRects * 2;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | SEEN_INDEX8;
}
void IndexGenerator::TranslateRectangles(int numInds, const u16 *_inds, int indexOffset) {
const u16_le *inds = (u16_le*)_inds;
int numRects = numInds / 2;
for (int i = 0; i < numRects; i++) {
*inds_++ = index_ - indexOffset + inds[i*2];
*inds_++ = index_ - indexOffset + inds[i*2+1];
}
count_ += numRects * 2;
prim_ = GE_PRIM_RECTANGLES;
seenPrims_ |= (1 << GE_PRIM_RECTANGLES) | SEEN_INDEX16;
}

View File

@ -0,0 +1,99 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <algorithm>
#include "CommonTypes.h"
#include "../ge_constants.h"
class IndexGenerator
{
public:
void Setup(u16 *indexptr);
void Reset();
static bool PrimCompatible(int prim1, int prim2);
bool PrimCompatible(int prim);
int Prim() const { return prim_; }
void AddPrim(int prim, int vertexCount);
void TranslatePrim(int prim, int numInds, const u8 *inds, int indexOffset);
void TranslatePrim(int prim, int numInds, const u16 *inds, int indexOffset);
void Advance(int numVerts) {
index_ += numVerts;
}
void SetIndex(int ind) { index_ = ind; }
int MaxIndex() const { return index_; }
int VertexCount() const { return count_; }
bool Empty() const { return index_ == 0; }
int SeenPrims() const { return seenPrims_; }
int PureCount() const { return pureCount_; }
bool SeenOnlyPurePrims() const {
return seenPrims_ == (1 << GE_PRIM_TRIANGLES) ||
seenPrims_ == (1 << GE_PRIM_LINES) ||
seenPrims_ == (1 << GE_PRIM_POINTS) ||
seenPrims_ == (1 << GE_PRIM_TRIANGLE_STRIP);
}
private:
// Points (why index these? code simplicity)
void AddPoints(int numVerts);
// Triangles
void AddList(int numVerts);
void AddStrip(int numVerts);
void AddFan(int numVerts);
// Lines
void AddLineList(int numVerts);
void AddLineStrip(int numVerts);
// Rectangles
void AddRectangles(int numVerts);
void TranslatePoints(int numVerts, const u8 *inds, int indexOffset);
void TranslatePoints(int numVerts, const u16 *inds, int indexOffset);
// Translates already indexed lists
void TranslateLineList(int numVerts, const u8 *inds, int indexOffset);
void TranslateLineList(int numVerts, const u16 *inds, int indexOffset);
void TranslateLineStrip(int numVerts, const u8 *inds, int indexOffset);
void TranslateLineStrip(int numVerts, const u16 *inds, int indexOffset);
void TranslateRectangles(int numVerts, const u8 *inds, int indexOffset);
void TranslateRectangles(int numVerts, const u16 *inds, int indexOffset);
void TranslateList(int numVerts, const u8 *inds, int indexOffset);
void TranslateList(int numVerts, const u16 *inds, int indexOffset);
void TranslateStrip(int numVerts, const u8 *inds, int indexOffset);
void TranslateStrip(int numVerts, const u16 *inds, int indexOffset);
void TranslateFan(int numVerts, const u8 *inds, int indexOffset);
void TranslateFan(int numVerts, const u16 *inds, int indexOffset);
enum {
SEEN_INDEX8 = 1 << 16,
SEEN_INDEX16 = 1 << 17
};
u16 *indsBase_;
u16 *inds_;
int index_;
int count_;
int pureCount_;
int prim_;
int seenPrims_;
};

View File

@ -0,0 +1,604 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#ifdef _WIN32
#define SHADERLOG
#endif
#include <map>
#include "helper/global.h"
#include "math/lin/matrix4x4.h"
#include "Common/Common.h"
#include "Core/Reporting.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/Directx9/ShaderManager.h"
#include "GPU/Directx9/TransformPipeline.h"
#include "UI/OnScreenDisplay.h"
#include "Framebuffer.h"
// For matrices convertions
#include <xnamath.h>
PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) {
source_ = code;
#ifdef SHADERLOG
OutputDebugString(code);
#endif
bool success;
success = CompilePixelShader(code, &shader, &constant);
if (!success) {
failed_ = true;
shader = NULL;
} else {
DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code);
}
}
PSShader::~PSShader() {
if (shader)
shader->Release();
}
VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) {
source_ = code;
#ifdef SHADERLOG
OutputDebugString(code);
#endif
bool success;
success = CompileVertexShader(code, &shader, &constant);
if (!success) {
failed_ = true;
shader = NULL;
} else {
DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code);
}
}
VSShader::~VSShader() {
if (shader)
shader->Release();
}
LinkedShader::LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform)
:dirtyUniforms(0), useHWTransform_(useHWTransform) {
INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader);
u_tex = fs->constant->GetConstantByName(NULL, "tex");
u_proj = vs->constant->GetConstantByName(NULL, "u_proj");
u_proj_through = vs->constant->GetConstantByName(NULL, "u_proj_through");
u_texenv = fs->constant->GetConstantByName(NULL, "u_texenv");
u_fogcolor = fs->constant->GetConstantByName(NULL, "u_fogcolor");
u_fogcoef = fs->constant->GetConstantByName(NULL, "u_fogcoef");
u_alphacolorref = fs->constant->GetConstantByName(NULL, "u_alphacolorref");
u_colormask = fs->constant->GetConstantByName(NULL, "u_colormask");
// Transform
u_view = vs->constant->GetConstantByName(NULL, "u_view");
u_world = vs->constant->GetConstantByName(NULL, "u_world");
u_texmtx = vs->constant->GetConstantByName(NULL, "u_texmtx");
numBones = gstate.getNumBoneWeights();
#ifdef USE_BONE_ARRAY
u_bone = glGetUniformLocation(program, "u_bone");
#else
for (int i = 0; i < numBones; i++) {
char name[10];
sprintf(name, "u_bone%i", i);
// u_bone[i] = glGetUniformLocation(program, name);
u_bone[i] = vs->constant->GetConstantByName(NULL, name);
}
#endif
// Lighting, texturing
u_ambient = vs->constant->GetConstantByName(NULL, "u_ambient");
u_matambientalpha = vs->constant->GetConstantByName(NULL, "u_matambientalpha");
u_matdiffuse = vs->constant->GetConstantByName(NULL, "u_matdiffuse");
u_matspecular = vs->constant->GetConstantByName(NULL, "u_matspecular");
u_matemissive = vs->constant->GetConstantByName(NULL, "u_matemissive");
u_uvscaleoffset = vs->constant->GetConstantByName(NULL, "u_uvscaleoffset");
for (int i = 0; i < 4; i++) {
char temp[64];
sprintf(temp, "u_lightpos%i", i);
u_lightpos[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightdir%i", i);
u_lightdir[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightatt%i", i);
u_lightatt[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightangle%i", i);
u_lightangle[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightspotCoef%i", i);
u_lightspotCoef[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightambient%i", i);
u_lightambient[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightdiffuse%i", i);
u_lightdiffuse[i] = vs->constant->GetConstantByName(NULL, temp);
sprintf(temp, "u_lightspecular%i", i);
u_lightspecular[i] = vs->constant->GetConstantByName(NULL, temp);
}
/*
a_position = glGetAttribLocation(program, "a_position");
a_color0 = glGetAttribLocation(program, "a_color0");
a_color1 = glGetAttribLocation(program, "a_color1");
a_texcoord = glGetAttribLocation(program, "a_texcoord");
a_normal = glGetAttribLocation(program, "a_normal");
a_weight0123 = glGetAttribLocation(program, "a_w1");
a_weight4567 = glGetAttribLocation(program, "a_w2");
*/
//glUseProgram(program);
pD3Ddevice->SetPixelShader(fs->shader);
pD3Ddevice->SetVertexShader(vs->shader);
m_vs = vs;
m_fs = fs;
// Default uniform values
//glUniform1i(u_tex, 0);
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL;
use();
}
LinkedShader::~LinkedShader() {
// glDeleteProgram(program);
}
// Utility
static void SetColorUniform3(LPD3DXCONSTANTTABLE constant, int uniform, u32 color) {
const float col[3] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f
};
constant->SetFloatArray(pD3Ddevice, uniform, col, 3);
}
static void SetColorUniform3Alpha(LPD3DXCONSTANTTABLE constant, int uniform, u32 color, u8 alpha) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
alpha/255.0f
};
//glUniform4fv(uniform, 1, col);
constant->SetFloatArray(pD3Ddevice, uniform, col, 4);
}
// This passes colors unscaled (e.g. 0 - 255 not 0 - 1.)
static void SetColorUniform3Alpha255(LPD3DXCONSTANTTABLE constant, int uniform, u32 color, u8 alpha) {
const float col[4] = {
(float)((color & 0xFF)),
(float)((color & 0xFF00) >> 8),
(float)((color & 0xFF0000) >> 16),
(float)alpha
};
//glUniform4fv(uniform, 1, col);
constant->SetFloatArray(pD3Ddevice, uniform, col, 4);
}
static void SetColorUniform3ExtraFloat(LPD3DXCONSTANTTABLE constant, int uniform, u32 color, float extra) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
extra
};
constant->SetFloatArray(pD3Ddevice, uniform, col, 4);
}
static void ConvertMatrix4x3To4x4(const float *m4x3, float *m4x4) {
m4x4[0] = m4x3[0];
m4x4[1] = m4x3[1];
m4x4[2] = m4x3[2];
m4x4[3] = 0.0f;
m4x4[4] = m4x3[3];
m4x4[5] = m4x3[4];
m4x4[6] = m4x3[5];
m4x4[7] = 0.0f;
m4x4[8] = m4x3[6];
m4x4[9] = m4x3[7];
m4x4[10] = m4x3[8];
m4x4[11] = 0.0f;
m4x4[12] = m4x3[9];
m4x4[13] = m4x3[10];
m4x4[14] = m4x3[11];
m4x4[15] = 1.0f;
}
static void SetMatrix4x3(LPD3DXCONSTANTTABLE constant, int uniform, const float *m4x3) {
float m4x4[16];
ConvertMatrix4x3To4x4(m4x3, m4x4);
constant->SetMatrix(pD3Ddevice, uniform, (D3DXMATRIX*)m4x4);
}
void LinkedShader::use() {
updateUniforms();
/*
glUseProgram(program);
updateUniforms();
glEnableVertexAttribArray(a_position);
if (a_texcoord != -1) glEnableVertexAttribArray(a_texcoord);
if (a_color0 != -1) glEnableVertexAttribArray(a_color0);
if (a_color1 != -1) glEnableVertexAttribArray(a_color1);
if (a_normal != -1) glEnableVertexAttribArray(a_normal);
if (a_weight0123 != -1) glEnableVertexAttribArray(a_weight0123);
if (a_weight4567 != -1) glEnableVertexAttribArray(a_weight4567);
*/
pD3Ddevice->SetPixelShader(m_fs->shader);
pD3Ddevice->SetVertexShader(m_vs->shader);
}
void LinkedShader::stop() {
/*
glDisableVertexAttribArray(a_position);
if (a_texcoord != -1) glDisableVertexAttribArray(a_texcoord);
if (a_color0 != -1) glDisableVertexAttribArray(a_color0);
if (a_color1 != -1) glDisableVertexAttribArray(a_color1);
if (a_normal != -1) glDisableVertexAttribArray(a_normal);
if (a_weight0123 != -1) glDisableVertexAttribArray(a_weight0123);
if (a_weight4567 != -1) glDisableVertexAttribArray(a_weight4567);
*/
}
// Depth in ogl is between -1;1 we need between 0;1
static void ConvertMatrices(Matrix4x4 & in) {
/*
in.zz *= 0.5f;
in.wz += 1.f;
*/
Matrix4x4 s;
Matrix4x4 t;
s.setScaling(Vec3(1, 1, 0.5f));
t.setTranslation(Vec3(0, 0, 0.5f));
in = in * s;
in = in * t;
}
void LinkedShader::updateUniforms() {
if (!dirtyUniforms)
return;
// Update any dirty uniforms before we draw
if (u_proj != 0 && (dirtyUniforms & DIRTY_PROJMATRIX)) {
Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
if (gstate_c.vpHeight < 0) {
flippedMatrix[5] = -flippedMatrix[5];
flippedMatrix[13] = -flippedMatrix[13];
}
if (gstate_c.vpWidth < 0) {
flippedMatrix[0] = -flippedMatrix[0];
flippedMatrix[12] = -flippedMatrix[12];
}
// Convert matrices !
ConvertMatrices(flippedMatrix);
m_vs->constant->SetMatrix(pD3Ddevice, u_proj, (D3DXMATRIX*)flippedMatrix.getReadPtr());
}
if (u_proj_through != 0 && (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX))
{
Matrix4x4 proj_through;
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
// Convert matrices !
ConvertMatrices(proj_through);
m_vs->constant->SetMatrix(pD3Ddevice, u_proj_through, (D3DXMATRIX*)proj_through.getReadPtr());
}
if (u_texenv != 0 && (dirtyUniforms & DIRTY_TEXENV)) {
SetColorUniform3(m_fs->constant, u_texenv, gstate.texenvcolor);
}
if (u_alphacolorref != 0 && (dirtyUniforms & DIRTY_ALPHACOLORREF)) {
SetColorUniform3Alpha255(m_fs->constant, u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef());
}
if (u_colormask != 0 && (dirtyUniforms & DIRTY_COLORMASK)) {
SetColorUniform3(m_fs->constant, u_colormask, gstate.colormask);
}
if (u_fogcolor != 0 && (dirtyUniforms & DIRTY_FOGCOLOR)) {
SetColorUniform3(m_fs->constant, u_fogcolor, gstate.fogcolor);
}
if (u_fogcoef != 0 && (dirtyUniforms & DIRTY_FOGCOEF)) {
const float fogcoef[2] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
};
//glUniform2fv(u_fogcoef, 1, fogcoef);
m_fs->constant->SetFloatArray(pD3Ddevice, u_fogcoef, fogcoef, 2);
}
// Texturing
if (u_uvscaleoffset != 0 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) {
float uvscaleoff[4];
if (gstate.isModeThrough()) {
// We never get here because we don't use HW transform with through mode.
// Although - why don't we?
uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth;
uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight;
uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth;
uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight;
} else {
int w = 1 << (gstate.texsize[0] & 0xf);
int h = 1 << ((gstate.texsize[0] >> 8) & 0xf);
float widthFactor = (float)w / (float)gstate_c.curTextureWidth;
float heightFactor = (float)h / (float)gstate_c.curTextureHeight;
if ((gstate.texmapmode & 3) == 0) {
static const float rescale[4] = {1.0f, 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT];
uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor;
uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor;
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
} else {
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;
uvscaleoff[2] = 0.0f;
uvscaleoff[3] = 0.0f;
}
}
m_vs->constant->SetFloatArray(pD3Ddevice, u_uvscaleoffset, uvscaleoff, 4);
}
// Transform
if (u_world != 0 && (dirtyUniforms & DIRTY_WORLDMATRIX)) {
SetMatrix4x3(m_vs->constant, u_world, gstate.worldMatrix);
}
if (u_view != 0 && (dirtyUniforms & DIRTY_VIEWMATRIX)) {
SetMatrix4x3(m_vs->constant, u_view, gstate.viewMatrix);
}
if (u_texmtx != 0 && (dirtyUniforms & DIRTY_TEXMATRIX)) {
SetMatrix4x3(m_vs->constant, u_texmtx, gstate.tgenMatrix);
}
// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
if (u_bone != 0) {
float allBones[8 * 16];
bool allDirty = true;
for (int i = 0; i < numBones; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, allBones + 16 * i);
} else {
allDirty = false;
}
}
if (allDirty) {
// Set them all with one call
glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones);
} else {
// Set them one by one. Could try to coalesce two in a row etc but too lazy.
for (int i = 0; i < numBones; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i);
}
}
}
}
#else
float bonetemp[16];
for (int i = 0; i < numBones; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, bonetemp);
//glUniformMatrix4fv(u_bone[i], 1, GL_FALSE, bonetemp);
//m_vs->constant->SetMatrix(pD3Ddevice, u_bone[i], (D3DXMATRIX*)bonetemp);
}
}
#endif
// Lighting
if (u_ambient != 0 && (dirtyUniforms & DIRTY_AMBIENT)) {
SetColorUniform3Alpha(m_vs->constant, u_ambient, gstate.ambientcolor, gstate.getAmbientA());
}
if (u_matambientalpha != 0 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) {
SetColorUniform3Alpha(m_vs->constant, u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA());
}
if (u_matdiffuse != 0 && (dirtyUniforms & DIRTY_MATDIFFUSE)) {
SetColorUniform3(m_vs->constant, u_matdiffuse, gstate.materialdiffuse);
}
if (u_matemissive != 0 && (dirtyUniforms & DIRTY_MATEMISSIVE)) {
SetColorUniform3(m_vs->constant,u_matemissive, gstate.materialemissive);
}
if (u_matspecular != 0 && (dirtyUniforms & DIRTY_MATSPECULAR)) {
SetColorUniform3ExtraFloat(m_vs->constant,u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
/*
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = gstate_c.lightpos[i][0];
float y = gstate_c.lightpos[i][1];
float z = gstate_c.lightpos[i][2];
float len = sqrtf(x*x+y*y+z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
if (u_lightpos[i] != -1) glUniform3fv(u_lightpos[i], 1, vec);
} else {
if (u_lightpos[i] != -1) glUniform3fv(u_lightpos[i], 1, gstate_c.lightpos[i]);
}
if (u_lightdir[i] != -1) glUniform3fv(u_lightdir[i], 1, gstate_c.lightdir[i]);
if (u_lightatt[i] != -1) glUniform3fv(u_lightatt[i], 1, gstate_c.lightatt[i]);
if (u_lightangle[i] != -1) glUniform1f(u_lightangle[i], gstate_c.lightangle[i]);
if (u_lightspotCoef[i] != -1) glUniform1f(u_lightspotCoef[i], gstate_c.lightspotCoef[i]);
if (u_lightambient[i] != -1) glUniform3fv(u_lightambient[i], 1, gstate_c.lightColor[0][i]);
if (u_lightdiffuse[i] != -1) glUniform3fv(u_lightdiffuse[i], 1, gstate_c.lightColor[1][i]);
if (u_lightspecular[i] != -1) glUniform3fv(u_lightspecular[i], 1, gstate_c.lightColor[2][i]);
}
}
*/
dirtyUniforms = 0;
}
ShaderManager::ShaderManager() : lastShader(NULL), globalDirty(0xFFFFFFFF), shaderSwitchDirty(0) {
codeBuffer_ = new char[16384];
}
ShaderManager::~ShaderManager() {
delete [] codeBuffer_;
}
void ShaderManager::DirtyUniform(u32 what) {
globalDirty |= what;
}
void ShaderManager::Clear() {
for (auto iter = linkedShaderCache.begin(); iter != linkedShaderCache.end(); ++iter) {
delete iter->ls;
}
for (auto iter = fsCache.begin(); iter != fsCache.end(); ++iter) {
delete iter->second;
}
for (auto iter = vsCache.begin(); iter != vsCache.end(); ++iter) {
delete iter->second;
}
linkedShaderCache.clear();
fsCache.clear();
vsCache.clear();
globalDirty = 0xFFFFFFFF;
lastFSID.clear();
lastVSID.clear();
DirtyShader();
}
void ShaderManager::ClearCache(bool deleteThem) {
Clear();
}
void ShaderManager::DirtyShader() {
// Forget the last shader ID
lastFSID.clear();
lastVSID.clear();
lastShader = 0;
globalDirty = 0xFFFFFFFF;
shaderSwitchDirty = 0;
}
void ShaderManager::EndFrame() { // disables vertex arrays
if (lastShader)
lastShader->stop();
lastShader = 0;
}
LinkedShader *ShaderManager::ApplyShader(int prim) {
if (globalDirty) {
if (lastShader)
lastShader->dirtyUniforms |= globalDirty;
shaderSwitchDirty |= globalDirty;
globalDirty = 0;
}
bool useHWTransform = CanUseHardwareTransform(prim);
VertexShaderID VSID;
FragmentShaderID FSID;
ComputeVertexShaderID(&VSID, prim, useHWTransform);
ComputeFragmentShaderID(&FSID);
// Just update uniforms if this is the same shader as last time.
if (lastShader != 0 && VSID == lastVSID && FSID == lastFSID) {
lastShader->updateUniforms();
return lastShader; // Already all set.
}
if (lastShader != 0) {
// There was a previous shader and we're switching.
lastShader->stop();
}
lastVSID = VSID;
lastFSID = FSID;
VSCache::iterator vsIter = vsCache.find(VSID);
VSShader *vs;
if (vsIter == vsCache.end()) {
// Vertex shader not in cache. Let's compile it.
GenerateVertexShader(prim, codeBuffer_, useHWTransform);
vs = new VSShader(codeBuffer_, useHWTransform);
if (vs->Failed()) {
ERROR_LOG(HLE, "Shader compilation failed, falling back to software transform");
osm.Show("hardware transform error - falling back to software", 2.5f, 0xFF3030FF, -1, true);
delete vs;
// TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure
// that that shader ID is not used when computing the linked shader ID below, because then IDs won't match
// next time and we'll do this over and over...
// Can still work with software transform.
GenerateVertexShader(prim, codeBuffer_, false);
vs = new VSShader(codeBuffer_, false);
}
vsCache[VSID] = vs;
} else {
vs = vsIter->second;
}
FSCache::iterator fsIter = fsCache.find(FSID);
PSShader *fs;
if (fsIter == fsCache.end()) {
// Fragment shader not in cache. Let's compile it.
GenerateFragmentShader(codeBuffer_);
fs = new PSShader(codeBuffer_, useHWTransform);
fsCache[FSID] = fs;
} else {
fs = fsIter->second;
}
// Okay, we have both shaders. Let's see if there's a linked one.
LinkedShader *ls = NULL;
for (auto iter = linkedShaderCache.begin(); iter != linkedShaderCache.end(); ++iter) {
// Deferred dirtying! Let's see if we can make this even more clever later.
iter->ls->dirtyUniforms |= shaderSwitchDirty;
if (iter->vs == vs && iter->fs == fs) {
ls = iter->ls;
}
}
shaderSwitchDirty = 0;
if (ls == NULL) {
ls = new LinkedShader(vs, fs, vs->UseHWTransform()); // This does "use" automatically
const LinkedShaderCacheEntry entry(vs, fs, ls);
linkedShaderCache.push_back(entry);
} else {
ls->use();
}
lastShader = ls;
return ls;
}

View File

@ -0,0 +1,215 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "base/basictypes.h"
#include "../../Globals.h"
#include <map>
#include "VertexShaderGenerator.h"
#include "FragmentShaderGenerator.h"
class PSShader;
class VSShader;
class LinkedShader
{
public:
LinkedShader(VSShader *vs, PSShader *fs, bool useHWTransform);
~LinkedShader();
void use();
void stop();
void updateUniforms();
// Set to false if the VS failed, happens on Mali-400 a lot for complex shaders.
bool useHWTransform_;
VSShader *m_vs;
PSShader *m_fs;
u32 dirtyUniforms;
// Pre-fetched attrs and uniforms
int a_position;
int a_color0;
int a_color1;
int a_texcoord;
int a_normal;
int a_weight0123;
int a_weight4567;
int u_tex;
int u_proj;
int u_proj_through;
int u_texenv;
int u_view;
int u_texmtx;
int u_world;
#ifdef USE_BONE_ARRAY
int u_bone; // array, size is numBones
#else
int u_bone[8];
#endif
int numBones;
// Fragment processing inputs
int u_alphacolorref;
int u_colormask;
int u_fogcolor;
int u_fogcoef;
// Texturing
int u_uvscaleoffset;
// Lighting
int u_ambient;
int u_matambientalpha;
int u_matdiffuse;
int u_matspecular;
int u_matemissive;
int u_lightpos[4];
int u_lightdir[4];
int u_lightatt[4]; // attenuation
int u_lightangle[4]; // spotlight cone angle (cosine)
int u_lightspotCoef[4]; // spotlight dropoff
int u_lightdiffuse[4]; // each light consist of vec4[3]
int u_lightspecular[4]; // attenuation
int u_lightambient[4]; // attenuation
};
// Will reach 32 bits soon :P
enum
{
DIRTY_PROJMATRIX = (1 << 0),
DIRTY_PROJTHROUGHMATRIX = (1 << 1),
DIRTY_FOGCOLOR = (1 << 2),
DIRTY_FOGCOEF = (1 << 3),
DIRTY_TEXENV = (1 << 4),
DIRTY_ALPHACOLORREF = (1 << 5),
DIRTY_COLORREF = (1 << 6),
DIRTY_COLORMASK = (1 << 7),
DIRTY_LIGHT0 = (1 << 8),
DIRTY_LIGHT1 = (1 << 9),
DIRTY_LIGHT2 = (1 << 10),
DIRTY_LIGHT3 = (1 << 11),
DIRTY_MATDIFFUSE = (1 << 12),
DIRTY_MATSPECULAR = (1 << 13),
DIRTY_MATEMISSIVE = (1 << 14),
DIRTY_AMBIENT = (1 << 15),
DIRTY_MATAMBIENTALPHA = (1 << 16),
DIRTY_MATERIAL = (1 << 17), // let's set all 4 together (emissive ambient diffuse specular). We hide specular coef in specular.a
DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares"
DIRTY_WORLDMATRIX = (1 << 21),
DIRTY_VIEWMATRIX = (1 << 22), // Maybe we'll fold this into projmatrix eventually
DIRTY_TEXMATRIX = (1 << 23),
DIRTY_BONEMATRIX0 = (1 << 24),
DIRTY_BONEMATRIX1 = (1 << 25),
DIRTY_BONEMATRIX2 = (1 << 26),
DIRTY_BONEMATRIX3 = (1 << 27),
DIRTY_BONEMATRIX4 = (1 << 28),
DIRTY_BONEMATRIX5 = (1 << 29),
DIRTY_BONEMATRIX6 = (1 << 30),
DIRTY_BONEMATRIX7 = (1 << 31),
DIRTY_ALL = 0xFFFFFFFF
};
// Real public interface
class PSShader {
public:
PSShader(const char *code, bool useHWTransform);
~PSShader();
const std::string &source() const { return source_; }
bool Failed() const { return failed_; }
bool UseHWTransform() const { return useHWTransform_; }
LPDIRECT3DPIXELSHADER9 shader;
LPD3DXCONSTANTTABLE constant;
protected:
std::string source_;
bool failed_;
bool useHWTransform_;
};
class VSShader {
public:
VSShader(const char *code, bool useHWTransform);
~VSShader();
const std::string &source() const { return source_; }
bool Failed() const { return failed_; }
bool UseHWTransform() const { return useHWTransform_; }
LPDIRECT3DVERTEXSHADER9 shader;
LPD3DXCONSTANTTABLE constant;
protected:
std::string source_;
bool failed_;
bool useHWTransform_;
};
class ShaderManager
{
public:
ShaderManager();
~ShaderManager();
void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected
LinkedShader *ApplyShader(int prim);
void DirtyShader();
void DirtyUniform(u32 what);
void EndFrame(); // disables vertex arrays
int NumVertexShaders() const { return (int)vsCache.size(); }
int NumFragmentShaders() const { return (int)fsCache.size(); }
int NumPrograms() const { return (int)linkedShaderCache.size(); }
private:
void Clear();
struct LinkedShaderCacheEntry {
LinkedShaderCacheEntry(VSShader *vs_, PSShader *fs_, LinkedShader *ls_)
: vs(vs_), fs(fs_), ls(ls_) { }
VSShader *vs;
PSShader *fs;
LinkedShader *ls;
};
typedef std::vector<LinkedShaderCacheEntry> LinkedShaderCache;
LinkedShaderCache linkedShaderCache;
FragmentShaderID lastFSID;
VertexShaderID lastVSID;
LinkedShader *lastShader;
u32 globalDirty;
u32 shaderSwitchDirty;
char *codeBuffer_;
typedef std::map<FragmentShaderID, PSShader *> FSCache;
FSCache fsCache;
typedef std::map<VertexShaderID, VSShader *> VSCache;
VSCache vsCache;
};

View File

@ -0,0 +1,370 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "StateMapping.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "Core/System.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "DisplayListInterpreter.h"
#include "ShaderManager.h"
#include "TextureCache.h"
#include "Framebuffer.h"
static const D3DBLEND aLookup[11] = {
D3DBLEND_DESTCOLOR,
D3DBLEND_INVDESTCOLOR,
D3DBLEND_SRCALPHA,
D3DBLEND_INVSRCALPHA,
D3DBLEND_DESTALPHA,
D3DBLEND_INVDESTALPHA,
D3DBLEND_SRCALPHA, // should be 2x
D3DBLEND_INVSRCALPHA, // should be 2x
D3DBLEND_DESTALPHA, // should be 2x
D3DBLEND_INVDESTALPHA, // should be 2x - and COLOR?
D3DBLEND_BLENDFACTOR, // FIXA
};
static const D3DBLEND bLookup[11] = {
D3DBLEND_SRCCOLOR,
D3DBLEND_INVSRCCOLOR,
D3DBLEND_SRCALPHA,
D3DBLEND_INVSRCALPHA,
D3DBLEND_DESTALPHA,
D3DBLEND_INVDESTALPHA,
D3DBLEND_SRCALPHA, // should be 2x
D3DBLEND_INVSRCALPHA, // should be 2x
D3DBLEND_DESTALPHA, // should be 2x
D3DBLEND_INVDESTALPHA, // should be 2x
D3DBLEND_BLENDFACTOR, // FIXB
};
static const D3DBLENDOP eqLookup[] = {
D3DBLENDOP_ADD,
D3DBLENDOP_SUBTRACT,
D3DBLENDOP_REVSUBTRACT,
D3DBLENDOP_MIN,
D3DBLENDOP_MAX,
D3DBLENDOP_ADD, // should be abs(diff)
};
static const D3DCULL cullingMode[] = {
D3DCULL_CW,
D3DCULL_CCW,
};
static const D3DCMPFUNC ztests[] = {
D3DCMP_NEVER, D3DCMP_ALWAYS, D3DCMP_EQUAL, D3DCMP_NOTEQUAL,
D3DCMP_LESS, D3DCMP_LESSEQUAL, D3DCMP_GREATER, D3DCMP_GREATEREQUAL,
};
static const D3DSTENCILOP stencilOps[] = {
D3DSTENCILOP_KEEP,
D3DSTENCILOP_ZERO,
D3DSTENCILOP_REPLACE,
D3DSTENCILOP_INVERT,
D3DSTENCILOP_INCR,
D3DSTENCILOP_DECR, // don't know if these should be wrap or not
D3DSTENCILOP_KEEP, // reserved
D3DSTENCILOP_KEEP, // reserved
};
static u32 blendColor2Func(u32 fix) {
if (fix == 0xFFFFFF)
return D3DBLEND_ONE;
if (fix == 0)
return D3DBLEND_ZERO;
Vec3f fix3 = Vec3f::FromRGB(fix);
if (fix3.x >= 0.99 && fix3.y >= 0.99 && fix3.z >= 0.99)
return D3DBLEND_ONE;
else if (fix3.x <= 0.01 && fix3.y <= 0.01 && fix3.z <= 0.01)
return D3DBLEND_ZERO;
return D3DBLEND_UNK;
}
static bool blendColorSimilar(Vec3f a, Vec3f b, float margin = 0.1f) {
Vec3f diff = a - b;
if (fabsf(diff.x) <= margin && fabsf(diff.y) <= margin && fabsf(diff.z) <= margin)
return true;
return false;
}
void TransformDrawEngine::ApplyDrawState(int prim) {
// TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall.
if (gstate_c.textureChanged) {
if (gstate.isTextureMapEnabled()) {
textureCache_->SetTexture();
}
gstate_c.textureChanged = false;
}
// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a
// single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily.
// Set blend
bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled();
dxstate.blend.set(wantBlend);
if (wantBlend) {
// This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop.
// HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly.
// Examples of seen unimplementable blend states:
// Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10
int blendFuncA = gstate.getBlendFuncA();
int blendFuncB = gstate.getBlendFuncB();
int blendFuncEq = gstate.getBlendEq();
if (blendFuncA > GE_SRCBLEND_FIXA) blendFuncA = GE_SRCBLEND_FIXA;
if (blendFuncB > GE_DSTBLEND_FIXB) blendFuncB = GE_DSTBLEND_FIXB;
// Shortcut by using D3DBLEND_ONE where possible, no need to set blendcolor
u32 glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA];
u32 glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB];
if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
Vec3f fixA = Vec3f::FromRGB(gstate.getFixA());
Vec3f fixB = Vec3f::FromRGB(gstate.getFixB());
if (glBlendFuncA == D3DBLEND_UNK && glBlendFuncB != D3DBLEND_UNK) {
// Can use blendcolor trivially.
const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
dxstate.blendColor.set(blendColor);
glBlendFuncA = D3DBLEND_BLENDFACTOR;
} else if (glBlendFuncA != D3DBLEND_UNK && glBlendFuncB == D3DBLEND_UNK) {
// Can use blendcolor trivially.
const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f};
dxstate.blendColor.set(blendColor);
glBlendFuncB = D3DBLEND_BLENDFACTOR;
} else if (glBlendFuncA == D3DBLEND_UNK && glBlendFuncB == D3DBLEND_UNK) {
if (blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f) - fixB)) {
glBlendFuncA = D3DBLEND_BLENDFACTOR;
glBlendFuncB = D3DBLEND_INVBLENDFACTOR;
const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
dxstate.blendColor.set(blendColor);
} else if (blendColorSimilar(fixA, fixB)) {
glBlendFuncA = D3DBLEND_BLENDFACTOR;
glBlendFuncB = D3DBLEND_BLENDFACTOR;
const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
dxstate.blendColor.set(blendColor);
} else {
static bool didReportBlend = false;
if (!didReportBlend)
Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB());
didReportBlend = true;
DEBUG_LOG(HLE, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB());
// Let's approximate, at least. Close is better than totally off.
const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f);
const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f);
if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) {
glBlendFuncA = nearZeroA ? D3DBLEND_ZERO : D3DBLEND_ONE;
glBlendFuncB = D3DBLEND_BLENDFACTOR;
const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f};
dxstate.blendColor.set(blendColor);
// We need to pick something. Let's go with A as the fixed color.
} else {
glBlendFuncA = D3DBLEND_BLENDFACTOR;
glBlendFuncB = nearZeroB ? D3DBLEND_ZERO : D3DBLEND_ONE;
const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f};
dxstate.blendColor.set(blendColor);
}
}
}
}
// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.
dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB);
dxstate.blendEquation.set(eqLookup[blendFuncEq]);
}
// Set Dither
if (gstate.isDitherEnabled()) {
dxstate.dither.enable();
dxstate.dither.set(true);
} else
dxstate.dither.disable();
// Set ColorMask/Stencil/Depth
if (gstate.isModeClear()) {
// Set Cull
dxstate.cullMode.set(false, false);
// Depth Test
bool depthMask = (gstate.clearmode >> 10) & 1;
dxstate.depthTest.enable();
dxstate.depthFunc.set(D3DCMP_ALWAYS);
dxstate.depthWrite.set(depthMask);
// Color Test
bool colorMask = (gstate.clearmode >> 8) & 1;
bool alphaMask = (gstate.clearmode >> 9) & 1;
dxstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask);
// Stencil Test
if (alphaMask) {
dxstate.stencilTest.enable();
dxstate.stencilOp.set(D3DSTENCILOP_REPLACE, D3DSTENCILOP_REPLACE, D3DSTENCILOP_REPLACE);
dxstate.stencilFunc.set(D3DCMP_ALWAYS, 0, 0xFF);
} else {
dxstate.depthTest.disable();
}
} else {
// Set cull
bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
dxstate.cullMode.set(wantCull, gstate.getCullMode());
// Depth Test
if (gstate.isDepthTestEnabled()) {
dxstate.depthTest.enable();
dxstate.depthFunc.set(ztests[gstate.getDepthTestFunc()]);
dxstate.depthWrite.set(gstate.isDepthWriteEnabled());
} else
dxstate.depthTest.disable();
// PSP color/alpha mask is per bit but we can only support per byte.
// But let's do that, at least. And let's try a threshold.
bool rmask = (gstate.pmskc & 0xFF) < 128;
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
bool amask = (gstate.pmska & 0xFF) < 128;
dxstate.colorMask.set(rmask, gmask, bmask, amask);
// Stencil Test
if (gstate.isStencilTestEnabled()) {
dxstate.stencilTest.enable();
dxstate.stencilFunc.set(ztests[gstate.getStencilTestFunction()],
gstate.getStencilTestRef(),
gstate.getStencilTestMask());
dxstate.stencilOp.set(stencilOps[gstate.getStencilOpSFail()], // stencil fail
stencilOps[gstate.getStencilOpZFail()], // depth fail
stencilOps[gstate.getStencilOpZPass()]); // depth pass
} else {
dxstate.stencilTest.disable();
}
}
float renderWidthFactor, renderHeightFactor;
float renderWidth, renderHeight;
float renderX, renderY;
bool useBufferedRendering = g_Config.iRenderingMode != 0 ? 1 : 0;
if (useBufferedRendering) {
renderX = 0.0f;
renderY = 0.0f;
renderWidth = framebufferManager_->GetRenderWidth();
renderHeight = framebufferManager_->GetRenderHeight();
renderWidthFactor = (float)renderWidth / framebufferManager_->GetTargetWidth();
renderHeightFactor = (float)renderHeight / framebufferManager_->GetTargetHeight();
} else {
// TODO: Aspect-ratio aware and centered
float pixelW = PSP_CoreParameter().pixelWidth;
float pixelH = PSP_CoreParameter().pixelHeight;
CenterRect(&renderX, &renderY, &renderWidth, &renderHeight, 480, 272, pixelW, pixelH);
renderWidthFactor = renderWidth / 480.0f;
renderHeightFactor = renderHeight / 272.0f;
}
bool throughmode = (gstate.vertType & GE_VTYPE_THROUGH_MASK) != 0;
// Scissor
int scissorX1 = (gstate.getScissorX1());
int scissorY1 = (gstate.getScissorY1());
int scissorX2 = (gstate.getScissorX2());
int scissorY2 = (gstate.getScissorY2());
// This is a bit of a hack as the render buffer isn't always that size
if (scissorX1 == 0 && scissorY1 == 0
&& scissorX2 >= (int) (gstate_c.curRTWidth - 1)
&& scissorY2 >= (int) (gstate_c.curRTHeight - 1)) {
dxstate.scissorTest.disable();
} else {
dxstate.scissorTest.enable();
dxstate.scissorRect.set(
renderX + scissorX1 * renderWidthFactor,
renderY + scissorY1 * renderHeightFactor,
renderY + scissorX2 * renderWidthFactor,
renderY + scissorY2 * renderHeightFactor);
}
/*
int regionX1 = gstate.region1 & 0x3FF;
int regionY1 = (gstate.region1 >> 10) & 0x3FF;
int regionX2 = (gstate.region2 & 0x3FF) + 1;
int regionY2 = ((gstate.region2 >> 10) & 0x3FF) + 1;
*/
int regionX1 = 0;
int regionY1 = 0;
int regionX2 = gstate_c.curRTWidth;
int regionY2 = gstate_c.curRTHeight;
float offsetX = (float)(gstate.offsetx & 0xFFFF) / 16.0f;
float offsetY = (float)(gstate.offsety & 0xFFFF) / 16.0f;
if (throughmode) {
// No viewport transform here. Let's experiment with using region.
dxstate.viewport.set(
renderX + (0 + regionX1) * renderWidthFactor,
renderY + (0 - regionY1) * renderHeightFactor,
(regionX2 - regionX1) * renderWidthFactor,
(regionY2 - regionY1) * renderHeightFactor,
0.f, 1.f);
} else {
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
float vpXa = getFloat24(gstate.viewportx1);
float vpXb = getFloat24(gstate.viewportx2);
float vpYa = getFloat24(gstate.viewporty1);
float vpYb = getFloat24(gstate.viewporty2);
// The viewport transform appears to go like this:
// Xscreen = -offsetX + vpXb + vpXa * Xview
// Yscreen = -offsetY + vpYb + vpYa * Yview
// Zscreen = vpZb + vpZa * Zview
// This means that to get the analogue glViewport we must:
float vpX0 = vpXb - offsetX - vpXa;
float vpY0 = vpYb - offsetY + vpYa; // Need to account for sign of Y
gstate_c.vpWidth = vpXa * 2.0f;
gstate_c.vpHeight = -vpYa * 2.0f;
float vpWidth = fabsf(gstate_c.vpWidth);
float vpHeight = fabsf(gstate_c.vpHeight);
vpX0 *= renderWidthFactor;
vpY0 *= renderHeightFactor;
vpWidth *= renderWidthFactor;
vpHeight *= renderHeightFactor;
vpX0 = (vpXb - offsetX - fabsf(vpXa)) * renderWidthFactor;
// Flip vpY0 to match the OpenGL coordinate system.
vpY0 = renderHeight - (vpYb - offsetY + fabsf(vpYa)) * renderHeightFactor;
// Sadly, as glViewport takes integers, we will not be able to support sub pixel offsets this way. But meh.
// shaderManager_->DirtyUniform(DIRTY_PROJMATRIX);
float zScale = getFloat24(gstate.viewportz1) / 65535.0f;
float zOff = getFloat24(gstate.viewportz2) / 65535.0f;
float depthRangeMin = zOff - zScale;
float depthRangeMax = zOff + zScale;
dxstate.viewport.set(vpX0 + renderX, vpY0 + renderY, vpWidth, vpHeight, depthRangeMin, depthRangeMax);
}
}

View File

@ -0,0 +1,5 @@
#pragma once
#include "helper/global.h"
#include "helper/dx_state.h"
//#include "../native/gfx/gl_common.h"

File diff suppressed because it is too large Load Diff

151
GPU/Directx9/TextureCache.h Normal file
View File

@ -0,0 +1,151 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "../Globals.h"
#include "helper/global.h"
#include "helper/fbo.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
#include "TextureScaler.h"
struct VirtualFramebuffer;
enum TextureFiltering {
AUTO = 1,
NEAREST = 2,
LINEAR = 3,
LINEARFMV = 4,
};
class TextureCache
{
public:
TextureCache();
~TextureCache();
void SetTexture();
void Clear(bool delete_them);
void StartFrame();
void Invalidate(u32 addr, int size, GPUInvalidationType type);
void InvalidateAll(GPUInvalidationType type);
void ClearNextFrame();
void LoadClut();
// FramebufferManager keeps TextureCache updated about what regions of memory
// are being rendered to. This is barebones so far.
void NotifyFramebuffer(u32 address, VirtualFramebuffer *framebuffer);
void NotifyFramebufferDestroyed(u32 address, VirtualFramebuffer *framebuffer);
size_t NumLoadedTextures() const {
return cache.size();
}
// Only used by Qt UI?
bool DecodeTexture(u8 *output, GPUgstate state);
private:
// Wow this is starting to grow big. Soon need to start looking at resizing it.
// Must stay a POD.
struct TexCacheEntry {
// After marking STATUS_UNRELIABLE, if it stays the same this many frames we'll trust it again.
const static int FRAMES_REGAIN_TRUST = 1000;
enum Status {
STATUS_HASHING = 0x00,
STATUS_RELIABLE = 0x01, // cache, don't hash
STATUS_UNRELIABLE = 0x02, // never cache
STATUS_MASK = 0x03,
STATUS_ALPHA_UNKNOWN = 0x04,
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
STATUS_ALPHA_SIMPLE = 0x08, // Like above, but also has 0 alpha (e.g. 5551.)
STATUS_ALPHA_MASK = 0x0c,
};
// Status, but int so we can zero initialize.
int status;
u32 addr;
u32 hash;
VirtualFramebuffer *framebuffer; // if null, not sourced from an FBO.
u32 sizeInRAM;
int lastFrame;
int numFrames;
int numInvalidated;
u32 framesUntilNextFullHash;
u8 format;
u16 dim;
u16 bufw;
LPDIRECT3DTEXTURE9 texture; //GLuint
int invalidHint;
u32 fullhash;
u32 cluthash;
int maxLevel;
float lodBias;
// Cache the current filter settings so we can avoid setting it again.
// (OpenGL madness where filter settings are attached to each texture).
u8 magFilt;
u8 minFilt;
bool sClamp;
bool tClamp;
bool Matches(u16 dim2, u8 format2, int maxLevel2);
};
void Decimate(); // Run this once per frame to get rid of old textures.
void *UnswizzleFromMem(u32 texaddr, u32 bufw, u32 bytesPerPixel, u32 level);
void *readIndexedTex(int level, u32 texaddr, int bytesPerIndex, u32 dstFmt);
void UpdateSamplingParams(TexCacheEntry &entry, bool force);
void LoadTextureLevel(TexCacheEntry &entry, int level, bool replaceImages);
void *DecodeTextureLevel(GETextureFormat format, GEPaletteFormat clutformat, int level, u32 &texByteAlign, u32 &dstFmt);
void CheckAlpha(TexCacheEntry &entry, u32 *pixelData, u32 dstFmt, int w, int h);
template <typename T>
const T *GetCurrentClut();
u32 GetCurrentClutHash();
void UpdateCurrentClut();
TexCacheEntry *GetEntryAt(u32 texaddr);
typedef std::map<u64, TexCacheEntry> TexCache;
TexCache cache;
TexCache secondCache;
bool clearCacheNextFrame_;
bool lowMemoryMode_;
TextureScaler scaler;
SimpleBuf<u32> tmpTexBuf32;
SimpleBuf<u16> tmpTexBuf16;
SimpleBuf<u32> tmpTexBufRearrange;
u32 clutLastFormat_;
u32 *clutBufRaw_;
u32 *clutBufConverted_;
u32 *clutBuf_;
u32 clutHash_;
u32 clutTotalBytes_;
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
bool clutAlphaLinear_;
u16 clutAlphaLinearColor_;
LPDIRECT3DTEXTURE9 lastBoundTexture;
float maxAnisotropyLevel;
};

View File

@ -0,0 +1,676 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "TextureScaler.h"
#include "Core/Config.h"
#include "Common/Common.h"
#include "Common/Log.h"
#include "Common/MsgHandler.h"
#include "Common/CommonFuncs.h"
#include "Common/ThreadPools.h"
#include "Common/CPUDetect.h"
#include "ext/xbrz/xbrz.h"
#include <stdlib.h>
#include <math.h>
#if _M_SSE >= 0x402
#include <nmmintrin.h>
#endif
// Report the time and throughput for each larger scaling operation in the log
//#define SCALING_MEASURE_TIME
#ifdef SCALING_MEASURE_TIME
#include "native/base/timeutil.h"
#endif
/////////////////////////////////////// Helper Functions (mostly math for parallelization)
namespace {
//////////////////////////////////////////////////////////////////// Color space conversion
// convert 4444 image to 8888, parallelizable
void convert4444(u16* data, u32* out, int width, int l, int u) {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
u32 val = data[y*width + x];
u32 r = ((val>>12) & 0xF) * 17;
u32 g = ((val>> 8) & 0xF) * 17;
u32 b = ((val>> 4) & 0xF) * 17;
u32 a = ((val>> 0) & 0xF) * 17;
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
}
}
}
// convert 565 image to 8888, parallelizable
void convert565(u16* data, u32* out, int width, int l, int u) {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
u32 val = data[y*width + x];
u32 r = Convert5To8((val>>11) & 0x1F);
u32 g = Convert6To8((val>> 5) & 0x3F);
u32 b = Convert5To8((val ) & 0x1F);
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
}
}
}
// convert 5551 image to 8888, parallelizable
void convert5551(u16* data, u32* out, int width, int l, int u) {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
u32 val = data[y*width + x];
u32 r = Convert5To8((val>>11) & 0x1F);
u32 g = Convert5To8((val>> 6) & 0x1F);
u32 b = Convert5To8((val>> 1) & 0x1F);
u32 a = (val & 0x1) * 255;
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
}
}
}
//////////////////////////////////////////////////////////////////// Various image processing
#define R(_col) ((_col>> 0)&0xFF)
#define G(_col) ((_col>> 8)&0xFF)
#define B(_col) ((_col>>16)&0xFF)
#define A(_col) ((_col>>24)&0xFF)
#define DISTANCE(_p1,_p2) ( abs(static_cast<int>(static_cast<int>(R(_p1))-R(_p2))) + abs(static_cast<int>(static_cast<int>(G(_p1))-G(_p2))) \
+ abs(static_cast<int>(static_cast<int>(B(_p1))-B(_p2))) + abs(static_cast<int>(static_cast<int>(A(_p1))-A(_p2))) )
// this is sadly much faster than an inline function with a loop, at least in VC10
#define MIX_PIXELS(_p0, _p1, _factors) \
( (R(_p0)*(_factors)[0] + R(_p1)*(_factors)[1])/255 << 0 ) | \
( (G(_p0)*(_factors)[0] + G(_p1)*(_factors)[1])/255 << 8 ) | \
( (B(_p0)*(_factors)[0] + B(_p1)*(_factors)[1])/255 << 16 ) | \
( (A(_p0)*(_factors)[0] + A(_p1)*(_factors)[1])/255 << 24 )
#define BLOCK_SIZE 32
// 3x3 convolution with Neumann boundary conditions, parallelizable
// quite slow, could be sped up a lot
// especially handling of separable kernels
void convolve3x3(u32* data, u32* out, const int kernel[3][3], int width, int height, int l, int u) {
for(int yb = 0; yb < (u-l)/BLOCK_SIZE+1; ++yb) {
for(int xb = 0; xb < width/BLOCK_SIZE+1; ++xb) {
for(int y = l+yb*BLOCK_SIZE; y < l+(yb+1)*BLOCK_SIZE && y < u; ++y) {
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < width; ++x) {
int val = 0;
for(int yoff = -1; yoff <= 1; ++yoff) {
int yy = std::max(std::min(y+yoff, height-1), 0);
for(int xoff = -1; xoff <= 1; ++xoff) {
int xx = std::max(std::min(x+xoff, width-1), 0);
val += data[yy*width + xx] * kernel[yoff+1][xoff+1];
}
}
out[y*width + x] = abs(val);
}
}
}
}
}
// deposterization: smoothes posterized gradients from low-color-depth (e.g. 444, 565, compressed) sources
void deposterizeH(u32* data, u32* out, int w, int l, int u) {
static const int T = 8;
for(int y = l; y < u; ++y) {
for(int x = 0; x < w; ++x) {
int inpos = y*w + x;
u32 center = data[inpos];
if(x==0 || x==w-1) {
out[y*w + x] = center;
continue;
}
u32 left = data[inpos - 1];
u32 right = data[inpos + 1];
out[y*w + x] = 0;
for(int c=0; c<4; ++c) {
u8 lc = (( left>>c*8)&0xFF);
u8 cc = ((center>>c*8)&0xFF);
u8 rc = (( right>>c*8)&0xFF);
if((lc != rc) && ((lc == cc && abs((int)((int)rc)-cc) <= T) || (rc == cc && abs((int)((int)lc)-cc) <= T))) {
// blend this component
out[y*w + x] |= ((rc+lc)/2) << (c*8);
} else {
// no change for this component
out[y*w + x] |= cc << (c*8);
}
}
}
}
}
void deposterizeV(u32* data, u32* out, int w, int h, int l, int u) {
static const int T = 8;
for(int xb = 0; xb < w/BLOCK_SIZE+1; ++xb) {
for(int y = l; y < u; ++y) {
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < w; ++x) {
u32 center = data[ y * w + x];
if(y==0 || y==h-1) {
out[y*w + x] = center;
continue;
}
u32 upper = data[(y-1) * w + x];
u32 lower = data[(y+1) * w + x];
out[y*w + x] = 0;
for(int c=0; c<4; ++c) {
u8 uc = (( upper>>c*8)&0xFF);
u8 cc = ((center>>c*8)&0xFF);
u8 lc = (( lower>>c*8)&0xFF);
if((uc != lc) && ((uc == cc && abs((int)((int)lc)-cc) <= T) || (lc == cc && abs((int)((int)uc)-cc) <= T))) {
// blend this component
out[y*w + x] |= ((lc+uc)/2) << (c*8);
} else {
// no change for this component
out[y*w + x] |= cc << (c*8);
}
}
}
}
}
}
// generates a distance mask value for each pixel in data
// higher values -> larger distance to the surrounding pixels
void generateDistanceMask(u32* data, u32* out, int width, int height, int l, int u) {
for(int yb = 0; yb < (u-l)/BLOCK_SIZE+1; ++yb) {
for(int xb = 0; xb < width/BLOCK_SIZE+1; ++xb) {
for(int y = l+yb*BLOCK_SIZE; y < l+(yb+1)*BLOCK_SIZE && y < u; ++y) {
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < width; ++x) {
out[y*width + x] = 0;
u32 center = data[y*width + x];
for(int yoff = -1; yoff <= 1; ++yoff) {
int yy = y+yoff;
if(yy == height || yy == -1) {
out[y*width + x] += 1200; // assume distance at borders, usually makes for better result
continue;
}
for(int xoff = -1; xoff <= 1; ++xoff) {
if(yoff == 0 && xoff == 0) continue;
int xx = x+xoff;
if(xx == width || xx == -1) {
out[y*width + x] += 400; // assume distance at borders, usually makes for better result
continue;
}
out[y*width + x] += DISTANCE(data[yy*width + xx], center);
}
}
}
}
}
}
}
// mix two images based on a mask
void mix(u32* data, u32* source, u32* mask, u32 maskmax, int width, int l, int u) {
for(int y = l; y < u; ++y) {
for(int x = 0; x < width; ++x) {
int pos = y*width + x;
u8 mixFactors[2] = { 0, static_cast<u8>((std::min(mask[pos], maskmax)*255)/maskmax) };
mixFactors[0] = 255-mixFactors[1];
data[pos] = MIX_PIXELS(data[pos], source[pos], mixFactors);
if(A(source[pos]) == 0) data[pos] = data[pos] & 0x00FFFFFF; // xBRZ always does a better job with hard alpha
}
}
}
//////////////////////////////////////////////////////////////////// Bicubic scaling
// generate the value of a Mitchell-Netravali scaling spline at distance d, with parameters A and B
// B=1 C=0 : cubic B spline (very smooth)
// B=C=1/3 : recommended for general upscaling
// B=0 C=1/2 : Catmull-Rom spline (sharp, ringing)
// see Mitchell & Netravali, "Reconstruction Filters in Computer Graphics"
inline float mitchell(float x, float B, float C) {
float ax = fabs(x);
if(ax>=2.0f) return 0.0f;
if(ax>=1.0f) return ((-B-6*C)*(x*x*x) + (6*B+30*C)*(x*x) + (-12*B-48*C)*x + (8*B+24*C))/6.0f;
return ((12-9*B-6*C)*(x*x*x) + (-18+12*B+6*C)*(x*x) + (6-2*B))/6.0f;
}
// arrays for pre-calculating weights and sums (~20KB)
// Dimensions:
// 0: 0 = BSpline, 1 = mitchell
// 2: 2-5x scaling
// 2,3: 5x5 generated pixels
// 4,5: 5x5 pixels sampled from
float bicubicWeights[2][4][5][5][5][5];
float bicubicInvSums[2][4][5][5];
// initialize pre-computed weights array
void initBicubicWeights() {
float B[2] = { 1.0f, 0.334f };
float C[2] = { 0.0f, 0.334f };
for(int type=0; type<2; ++type) {
for(int factor=2; factor<=5; ++factor) {
for(int x=0; x<factor; ++x) {
for(int y=0; y<factor; ++y) {
float sum = 0.0f;
for(int sx = -2; sx <= 2; ++sx) {
for(int sy = -2; sy <= 2; ++sy) {
float dx = (x+0.5f)/factor - (sx+0.5f);
float dy = (y+0.5f)/factor - (sy+0.5f);
float dist = sqrt(dx*dx + dy*dy);
float weight = mitchell(dist, B[type], C[type]);
bicubicWeights[type][factor-2][x][y][sx+2][sy+2] = weight;
sum += weight;
}
}
bicubicInvSums[type][factor-2][x][y] = 1.0f/sum;
}
}
}
}
}
// perform bicubic scaling by factor f, with precomputed spline type T
template<int f, int T>
void scaleBicubicT(u32* data, u32* out, int w, int h, int l, int u) {
int outw = w*f;
for(int yb = 0; yb < (u-l)*f/BLOCK_SIZE+1; ++yb) {
for(int xb = 0; xb < w*f/BLOCK_SIZE+1; ++xb) {
for(int y = l*f+yb*BLOCK_SIZE; y < l*f+(yb+1)*BLOCK_SIZE && y < u*f; ++y) {
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < w*f; ++x) {
float r = 0.0f, g = 0.0f, b = 0.0f, a = 0.0f;
int cx = x/f, cy = y/f;
// sample supporting pixels in original image
for(int sx = -2; sx <= 2; ++sx) {
for(int sy = -2; sy <= 2; ++sy) {
float weight = bicubicWeights[T][f-2][x%f][y%f][sx+2][sy+2];
if(weight != 0.0f) {
// clamp pixel locations
int csy = std::max(std::min(sy+cy,h-1),0);
int csx = std::max(std::min(sx+cx,w-1),0);
// sample & add weighted components
u32 sample = data[csy*w+csx];
r += weight*R(sample);
g += weight*G(sample);
b += weight*B(sample);
a += weight*A(sample);
}
}
}
// generate and write result
float invSum = bicubicInvSums[T][f-2][x%f][y%f];
int ri = std::min(std::max(static_cast<int>(ceilf(r*invSum)),0),255);
int gi = std::min(std::max(static_cast<int>(ceilf(g*invSum)),0),255);
int bi = std::min(std::max(static_cast<int>(ceilf(b*invSum)),0),255);
int ai = std::min(std::max(static_cast<int>(ceilf(a*invSum)),0),255);
out[y*outw + x] = (ai << 24) | (bi << 16) | (gi << 8) | ri;
}
}
}
}
}
#if _M_SSE >= 0x401
template<int f, int T>
void scaleBicubicTSSE41(u32* data, u32* out, int w, int h, int l, int u) {
int outw = w*f;
for(int yb = 0; yb < (u-l)*f/BLOCK_SIZE+1; ++yb) {
for(int xb = 0; xb < w*f/BLOCK_SIZE+1; ++xb) {
for(int y = l*f+yb*BLOCK_SIZE; y < l*f+(yb+1)*BLOCK_SIZE && y < u*f; ++y) {
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < w*f; ++x) {
__m128 result = _mm_set1_ps(0.0f);
int cx = x/f, cy = y/f;
// sample supporting pixels in original image
for(int sx = -2; sx <= 2; ++sx) {
for(int sy = -2; sy <= 2; ++sy) {
float weight = bicubicWeights[T][f-2][x%f][y%f][sx+2][sy+2];
if(weight != 0.0f) {
// clamp pixel locations
int csy = std::max(std::min(sy+cy,h-1),0);
int csx = std::max(std::min(sx+cx,w-1),0);
// sample & add weighted components
__m128i sample = _mm_cvtsi32_si128(data[csy*w+csx]);
sample = _mm_cvtepu8_epi32(sample);
__m128 col = _mm_cvtepi32_ps(sample);
col = _mm_mul_ps(col, _mm_set1_ps(weight));
result = _mm_add_ps(result, col);
}
}
}
// generate and write result
__m128i pixel = _mm_cvtps_epi32(_mm_mul_ps(result, _mm_set1_ps(bicubicInvSums[T][f-2][x%f][y%f])));
pixel = _mm_packs_epi32(pixel, pixel);
pixel = _mm_packus_epi16(pixel, pixel);
out[y*outw + x] = _mm_cvtsi128_si32(pixel);
}
}
}
}
}
#endif
void scaleBicubicBSpline(int factor, u32* data, u32* out, int w, int h, int l, int u) {
#if _M_SSE >= 0x401
if(cpu_info.bSSE4_1) {
switch(factor) {
case 2: scaleBicubicTSSE41<2, 0>(data, out, w, h, l, u); break; // when I first tested this,
case 3: scaleBicubicTSSE41<3, 0>(data, out, w, h, l, u); break; // it was even slower than I had expected
case 4: scaleBicubicTSSE41<4, 0>(data, out, w, h, l, u); break; // turns out I had not included
case 5: scaleBicubicTSSE41<5, 0>(data, out, w, h, l, u); break; // any of these break statements
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
}
} else {
#endif
switch(factor) {
case 2: scaleBicubicT<2, 0>(data, out, w, h, l, u); break; // when I first tested this,
case 3: scaleBicubicT<3, 0>(data, out, w, h, l, u); break; // it was even slower than I had expected
case 4: scaleBicubicT<4, 0>(data, out, w, h, l, u); break; // turns out I had not included
case 5: scaleBicubicT<5, 0>(data, out, w, h, l, u); break; // any of these break statements
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
}
#if _M_SSE >= 0x401
}
#endif
}
void scaleBicubicMitchell(int factor, u32* data, u32* out, int w, int h, int l, int u) {
#if _M_SSE >= 0x401
if(cpu_info.bSSE4_1) {
switch(factor) {
case 2: scaleBicubicTSSE41<2, 1>(data, out, w, h, l, u); break;
case 3: scaleBicubicTSSE41<3, 1>(data, out, w, h, l, u); break;
case 4: scaleBicubicTSSE41<4, 1>(data, out, w, h, l, u); break;
case 5: scaleBicubicTSSE41<5, 1>(data, out, w, h, l, u); break;
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
}
} else {
#endif
switch(factor) {
case 2: scaleBicubicT<2, 1>(data, out, w, h, l, u); break;
case 3: scaleBicubicT<3, 1>(data, out, w, h, l, u); break;
case 4: scaleBicubicT<4, 1>(data, out, w, h, l, u); break;
case 5: scaleBicubicT<5, 1>(data, out, w, h, l, u); break;
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
}
#if _M_SSE >= 0x401
}
#endif
}
//////////////////////////////////////////////////////////////////// Bilinear scaling
const static u8 BILINEAR_FACTORS[4][3][2] = {
{ { 44,211}, { 0, 0}, { 0, 0} }, // x2
{ { 64,191}, { 0,255}, { 0, 0} }, // x3
{ { 77,178}, { 26,229}, { 0, 0} }, // x4
{ {102,153}, { 51,204}, { 0,255} }, // x5
};
// integral bilinear upscaling by factor f, horizontal part
template<int f>
void bilinearHt(u32* data, u32* out, int w, int l, int u) {
static_assert(f>1 && f<=5, "Bilinear scaling only implemented for factors 2 to 5");
int outw = w*f;
for(int y = l; y < u; ++y) {
for(int x = 0; x < w; ++x) {
int inpos = y*w + x;
u32 left = data[inpos - (x==0 ?0:1)];
u32 center = data[inpos];
u32 right = data[inpos + (x==w-1?0:1)];
int i=0;
for(; i<f/2+f%2; ++i) { // first half of the new pixels + center, hope the compiler unrolls this
out[y*outw + x*f + i] = MIX_PIXELS(left, center, BILINEAR_FACTORS[f-2][i]);
}
for(; i<f ; ++i) { // second half of the new pixels, hope the compiler unrolls this
out[y*outw + x*f + i] = MIX_PIXELS(right, center, BILINEAR_FACTORS[f-2][f-1-i]);
}
}
}
}
void bilinearH(int factor, u32* data, u32* out, int w, int l, int u) {
switch(factor) {
case 2: bilinearHt<2>(data, out, w, l, u); break;
case 3: bilinearHt<3>(data, out, w, l, u); break;
case 4: bilinearHt<4>(data, out, w, l, u); break;
case 5: bilinearHt<5>(data, out, w, l, u); break;
default: ERROR_LOG(G3D, "Bilinear upsampling only implemented for factors 2 to 5");
}
}
// integral bilinear upscaling by factor f, vertical part
// gl/gu == global lower and upper bound
template<int f>
void bilinearVt(u32* data, u32* out, int w, int gl, int gu, int l, int u) {
static_assert(f>1 && f<=5, "Bilinear scaling only implemented for 2x, 3x, 4x, and 5x");
int outw = w*f;
for(int xb = 0; xb < outw/BLOCK_SIZE+1; ++xb) {
for(int y = l; y < u; ++y) {
u32 uy = y - (y==gl ?0:1);
u32 ly = y + (y==gu-1?0:1);
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < outw; ++x) {
u32 upper = data[uy * outw + x];
u32 center = data[y * outw + x];
u32 lower = data[ly * outw + x];
int i=0;
for(; i<f/2+f%2; ++i) { // first half of the new pixels + center, hope the compiler unrolls this
out[(y*f + i)*outw + x] = MIX_PIXELS(upper, center, BILINEAR_FACTORS[f-2][i]);
}
for(; i<f ; ++i) { // second half of the new pixels, hope the compiler unrolls this
out[(y*f + i)*outw + x] = MIX_PIXELS(lower, center, BILINEAR_FACTORS[f-2][f-1-i]);
}
}
}
}
}
void bilinearV(int factor, u32* data, u32* out, int w, int gl, int gu, int l, int u) {
switch(factor) {
case 2: bilinearVt<2>(data, out, w, gl, gu, l, u); break;
case 3: bilinearVt<3>(data, out, w, gl, gu, l, u); break;
case 4: bilinearVt<4>(data, out, w, gl, gu, l, u); break;
case 5: bilinearVt<5>(data, out, w, gl, gu, l, u); break;
default: ERROR_LOG(G3D, "Bilinear upsampling only implemented for factors 2 to 5");
}
}
#undef BLOCK_SIZE
#undef MIX_PIXELS
#undef DISTANCE
#undef R
#undef G
#undef B
#undef A
// used for debugging texture scaling (writing textures to files)
static int g_imgCount = 0;
void dbgPPM(int w, int h, u8* pixels, const char* prefix = "dbg") { // 3 component RGB
char fn[32];
snprintf(fn, 32, "%s%04d.ppm", prefix, g_imgCount++);
FILE *fp = fopen(fn, "wb");
fprintf(fp, "P6\n%d %d\n255\n", w, h);
for(int j = 0; j < h; ++j) {
for(int i = 0; i < w; ++i) {
static unsigned char color[3];
color[0] = pixels[(j*w+i)*4+0]; /* red */
color[1] = pixels[(j*w+i)*4+1]; /* green */
color[2] = pixels[(j*w+i)*4+2]; /* blue */
fwrite(color, 1, 3, fp);
}
}
fclose(fp);
}
void dbgPGM(int w, int h, u32* pixels, const char* prefix = "dbg") { // 1 component
char fn[32];
snprintf(fn, 32, "%s%04d.pgm", prefix, g_imgCount++);
FILE *fp = fopen(fn, "wb");
fprintf(fp, "P5\n%d %d\n65536\n", w, h);
for(int j = 0; j < h; ++j) {
for(int i = 0; i < w; ++i) {
fwrite((pixels+(j*w+i)), 1, 2, fp);
}
}
fclose(fp);
}
}
/////////////////////////////////////// Texture Scaler
TextureScaler::TextureScaler() {
initBicubicWeights();
}
bool TextureScaler::IsEmptyOrFlat(u32* data, int pixels, u32 fmt) {
int pixelsPerWord = (fmt == D3DFMT_A8R8G8B8) ? 1 : 2;
int ref = data[0];
for(int i=0; i<pixels/pixelsPerWord; ++i) {
if(data[i]!=ref) return false;
}
return true;
}
void TextureScaler::Scale(u32* &data, u32 &dstFmt, int &width, int &height, int factor) {
// prevent processing empty or flat textures (this happens a lot in some games)
// doesn't hurt the standard case, will be very quick for textures with actual texture
if(IsEmptyOrFlat(data, width*height, dstFmt)) {
INFO_LOG(G3D, "TextureScaler: early exit -- empty/flat texture");
return;
}
#ifdef SCALING_MEASURE_TIME
double t_start = real_time_now();
#endif
bufInput.resize(width*height); // used to store the input image image if it needs to be reformatted
bufOutput.resize(width*height*factor*factor); // used to store the upscaled image
u32 *inputBuf = bufInput.data();
u32 *outputBuf = bufOutput.data();
// convert texture to correct format for scaling
ConvertTo8888(dstFmt, data, inputBuf, width, height);
// deposterize
if(g_Config.bTexDeposterize) {
bufDeposter.resize(width*height);
DePosterize(inputBuf, bufDeposter.data(), width, height);
inputBuf = bufDeposter.data();
}
// scale
switch(g_Config.iTexScalingType) {
case XBRZ:
ScaleXBRZ(factor, inputBuf, outputBuf, width, height);
break;
case HYBRID:
ScaleHybrid(factor, inputBuf, outputBuf, width, height);
break;
case BICUBIC:
ScaleBicubicMitchell(factor, inputBuf, outputBuf, width, height);
break;
case HYBRID_BICUBIC:
ScaleHybrid(factor, inputBuf, outputBuf, width, height, true);
break;
default:
ERROR_LOG(G3D, "Unknown scaling type: %d", g_Config.iTexScalingType);
}
// update values accordingly
data = outputBuf;
dstFmt = D3DFMT_A8R8G8B8;
width *= factor;
height *= factor;
#ifdef SCALING_MEASURE_TIME
if(width*height > 64*64*factor*factor) {
double t = real_time_now() - t_start;
NOTICE_LOG(MASTER_LOG, "TextureScaler: processed %9d pixels in %6.5lf seconds. (%9.2lf Mpixels/second)",
width*height, t, (width*height)/(t*1000*1000));
}
#endif
}
void TextureScaler::ScaleXBRZ(int factor, u32* source, u32* dest, int width, int height) {
xbrz::ScalerCfg cfg;
GlobalThreadPool::Loop(std::bind(&xbrz::scale, factor, source, dest, width, height, cfg, placeholder::_1, placeholder::_2), 0, height);
}
void TextureScaler::ScaleBilinear(int factor, u32* source, u32* dest, int width, int height) {
bufTmp1.resize(width*height*factor);
u32 *tmpBuf = bufTmp1.data();
GlobalThreadPool::Loop(std::bind(&bilinearH, factor, source, tmpBuf, width, placeholder::_1, placeholder::_2), 0, height);
GlobalThreadPool::Loop(std::bind(&bilinearV, factor, tmpBuf, dest, width, 0, height, placeholder::_1, placeholder::_2), 0, height);
}
void TextureScaler::ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height) {
GlobalThreadPool::Loop(std::bind(&scaleBicubicBSpline, factor, source, dest, width, height, placeholder::_1, placeholder::_2), 0, height);
}
void TextureScaler::ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height) {
GlobalThreadPool::Loop(std::bind(&scaleBicubicMitchell, factor, source, dest, width, height, placeholder::_1, placeholder::_2), 0, height);
}
void TextureScaler::ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic) {
// Basic algorithm:
// 1) determine a feature mask C based on a sobel-ish filter + splatting, and upscale that mask bilinearly
// 2) generate 2 scaled images: A - using Bilinear filtering, B - using xBRZ
// 3) output = A*C + B*(1-C)
const static int KERNEL_SPLAT[3][3] = {
{ 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }
};
bufTmp1.resize(width*height);
bufTmp2.resize(width*height*factor*factor);
bufTmp3.resize(width*height*factor*factor);
GlobalThreadPool::Loop(std::bind(&generateDistanceMask, source, bufTmp1.data(), width, height, placeholder::_1, placeholder::_2), 0, height);
GlobalThreadPool::Loop(std::bind(&convolve3x3, bufTmp1.data(), bufTmp2.data(), KERNEL_SPLAT, width, height, placeholder::_1, placeholder::_2), 0, height);
ScaleBilinear(factor, bufTmp2.data(), bufTmp3.data(), width, height);
// mask C is now in bufTmp3
ScaleXBRZ(factor, source, bufTmp2.data(), width, height);
// xBRZ upscaled source is in bufTmp2
if(bicubic) ScaleBicubicBSpline(factor, source, dest, width, height);
else ScaleBilinear(factor, source, dest, width, height);
// Upscaled source is in dest
// Now we can mix it all together
// The factor 8192 was found through practical testing on a variety of textures
GlobalThreadPool::Loop(std::bind(&mix, dest, bufTmp2.data(), bufTmp3.data(), 8192, width*factor, placeholder::_1, placeholder::_2), 0, height*factor);
}
void TextureScaler::DePosterize(u32* source, u32* dest, int width, int height) {
bufTmp3.resize(width*height);
GlobalThreadPool::Loop(std::bind(&deposterizeH, source, bufTmp3.data(), width, placeholder::_1, placeholder::_2), 0, height);
GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
GlobalThreadPool::Loop(std::bind(&deposterizeH, dest, bufTmp3.data(), width, placeholder::_1, placeholder::_2), 0, height);
GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
}
void TextureScaler::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
switch(format) {
case D3DFMT_A8R8G8B8:
dest = source; // already fine
break;
case D3DFMT_A4R4G4B4:
GlobalThreadPool::Loop(std::bind(&convert4444, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
break;
case D3DFMT_R5G6B5:
GlobalThreadPool::Loop(std::bind(&convert565, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
break;
case D3DFMT_A1R5G5B5:
GlobalThreadPool::Loop(std::bind(&convert5551, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
break;
default:
dest = source;
ERROR_LOG(G3D, "iXBRZTexScaling: unsupported texture format");
}
}

View File

@ -0,0 +1,52 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Common/MemoryUtil.h"
#include "../Globals.h"
#include "helper/global.h"
//#include "gfx/gl_common.h"
#include <vector>
class TextureScaler {
public:
TextureScaler();
void Scale(u32* &data, u32 &dstfmt, int &width, int &height, int factor);
enum { XBRZ= 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3 };
private:
void ScaleXBRZ(int factor, u32* source, u32* dest, int width, int height);
void ScaleBilinear(int factor, u32* source, u32* dest, int width, int height);
void ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height);
void ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height);
void ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic = false);
void ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height);
void DePosterize(u32* source, u32* dest, int width, int height);
bool IsEmptyOrFlat(u32* data, int pixels, u32 fmt);
// depending on the factor and texture sizes, these can get pretty large
// maximum is (100 MB total for a 512 by 512 texture with scaling factor 5 and hybrid scaling)
// of course, scaling factor 5 is totally silly anyway
SimpleBuf<u32> bufInput, bufDeposter, bufOutput, bufTmp1, bufTmp2, bufTmp3;
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,221 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <map>
#include "IndexGenerator.h"
#include "VertexDecoder.h"
class LinkedShader;
class ShaderManager;
class TextureCache;
class FramebufferManager;
struct DecVtxFormat;
// States transitions:
// On creation: DRAWN_NEW
// DRAWN_NEW -> DRAWN_HASHING
// DRAWN_HASHING -> DRAWN_RELIABLE
// DRAWN_HASHING -> DRAWN_UNRELIABLE
// DRAWN_ONCE -> UNRELIABLE
// DRAWN_RELIABLE -> DRAWN_SAFE
// UNRELIABLE -> death
// DRAWN_ONCE -> death
// DRAWN_RELIABLE -> death
// Don't bother storing information about draws smaller than this.
enum {
VERTEX_CACHE_THRESHOLD = 20,
};
// Try to keep this POD.
class VertexArrayInfo {
public:
VertexArrayInfo() {
status = VAI_NEW;
vbo = 0;
ebo = 0;
numDCs = 0;
prim = -1;
numDraws = 0;
numFrames = 0;
lastFrame = gpuStats.numFrames;
numVerts = 0;
drawsUntilNextFullHash = 0;
}
~VertexArrayInfo();
enum Status {
VAI_NEW,
VAI_HASHING,
VAI_RELIABLE, // cache, don't hash
VAI_UNRELIABLE, // never cache
};
u32 hash;
Status status;
LPDIRECT3DVERTEXBUFFER9 vbo;
LPDIRECT3DINDEXBUFFER9 ebo;
// Precalculated parameter for drawdrawElements
u16 numVerts;
s8 prim;
// ID information
u8 numDCs;
int numDraws;
int numFrames;
int lastFrame; // So that we can forget.
u16 drawsUntilNextFullHash;
};
// Handles transform, lighting and drawing.
class TransformDrawEngine {
public:
TransformDrawEngine();
virtual ~TransformDrawEngine();
void SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertexType, int forceIndexType, int *bytesRead);
void DrawBezier(int ucount, int vcount);
void DrawSpline(int ucount, int vcount, int utype, int vtype);
void DecodeVerts();
void Flush();
void SetShaderManager(ShaderManager *shaderManager) {
shaderManager_ = shaderManager;
}
void SetTextureCache(TextureCache *textureCache) {
textureCache_ = textureCache;
}
void SetFramebufferManager(FramebufferManager *fbManager) {
framebufferManager_ = fbManager;
}
void InitDeviceObjects();
void DestroyDeviceObjects();
void GLLost() {};
void DecimateTrackedVertexArrays();
void ClearTrackedVertexArrays();
void SetupVertexDecoder(u32 vertType);
// This requires a SetupVertexDecoder call first.
int EstimatePerVertexCost();
private:
void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
void ApplyDrawState(int prim);
bool IsReallyAClear(int numVerts) const;
// drawcall ID
u32 ComputeFastDCID();
u32 ComputeHash(); // Reads deferred vertex data.
VertexDecoder *GetVertexDecoder(u32 vtype);
// Defer all vertex decoding to a Flush, so that we can hash and cache the
// generated buffers without having to redecode them every time.
struct DeferredDrawCall {
void *verts;
void *inds;
u32 vertType;
u8 indexType;
u8 prim;
u16 vertexCount;
u16 indexLowerBound;
u16 indexUpperBound;
};
// Vertex collector state
IndexGenerator indexGen;
int collectedVerts;
int prevPrim_;
// Cached vertex decoders
std::map<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_;
u32 lastVType_;
// Vertex collector buffers
u8 *decoded;
u16 *decIndex;
TransformedVertex *transformed;
TransformedVertex *transformedExpanded;
std::map<u32, VertexArrayInfo *> vai_;
// Vertex buffer objects
// Element buffer objects
enum { NUM_VBOS = 2 };
LPDIRECT3DVERTEXBUFFER9 vbo_[NUM_VBOS];
LPDIRECT3DINDEXBUFFER9 ebo_[NUM_VBOS];
int curVbo_;
// Other
ShaderManager *shaderManager_;
TextureCache *textureCache_;
FramebufferManager *framebufferManager_;
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
UVScale *uvScale;
};
// Only used by SW transform
struct Color4 {
float a, r, g, b;
Color4() : r(0), g(0), b(0), a(0) { }
Color4(float _r, float _g, float _b, float _a=1.0f)
: r(_r), g(_g), b(_b), a(_a) {
}
Color4(const float in[4]) {a=in[0];r=in[1];g=in[2];b=in[3];}
Color4(const float in[3], float alpha) {r=in[0];g=in[1];b=in[2];a=alpha;}
const float &operator [](int i) const {return *(&a + i);}
Color4 operator *(float f) const {
return Color4(f*r,f*g,f*b,f*a);
}
Color4 operator *(const Color4 &c) const {
return Color4(r*c.r,g*c.g,b*c.b,a*c.a);
}
Color4 operator +(const Color4 &c) const {
return Color4(r+c.r,g+c.g,b+c.b,a+c.a);
}
void operator +=(const Color4 &c) {
r+=c.r;
g+=c.g;
b+=c.b;
a+=c.a;
}
void GetFromRGB(u32 col) {
b = ((col>>16) & 0xff)/255.0f;
g = ((col>>8) & 0xff)/255.0f;
r = ((col>>0) & 0xff)/255.0f;
}
void GetFromA(u32 col) {
a = (col&0xff)/255.0f;
}
};

View File

@ -0,0 +1,886 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "math/lin/matrix4x4.h"
#include "Core/Config.h"
#include "Core/MemMap.h"
#include "GPU/ge_constants.h"
#include "VertexDecoder.h"
#include "VertexShaderGenerator.h"
void PrintDecodedVertex(VertexReader &vtx) {
if (vtx.hasNormal())
{
float nrm[3];
vtx.ReadNrm(nrm);
printf("N: %f %f %f\n", nrm[0], nrm[1], nrm[2]);
}
if (vtx.hasUV()) {
float uv[2];
vtx.ReadUV(uv);
printf("TC: %f %f\n", uv[0], uv[1]);
}
if (vtx.hasColor0()) {
float col0[4];
vtx.ReadColor0(col0);
printf("C0: %f %f %f %f\n", col0[0], col0[1], col0[2], col0[3]);
}
if (vtx.hasColor1()) {
float col1[3];
vtx.ReadColor1(col1);
printf("C1: %f %f %f\n", col1[0], col1[1], col1[2]);
}
// Etc..
float pos[3];
vtx.ReadPos(pos);
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
}
const u8 tcsize[4] = {0,2,4,8}, tcalign[4] = {0,1,2,4};
const u8 colsize[8] = {0,0,0,0,2,2,2,4}, colalign[8] = {0,0,0,0,2,2,2,4};
const u8 nrmsize[4] = {0,3,6,12}, nrmalign[4] = {0,1,2,4};
const u8 possize[4] = {0,3,6,12}, posalign[4] = {0,1,2,4};
const u8 wtsize[4] = {0,1,2,4}, wtalign[4] = {0,1,2,4};
inline int align(int n, int align) {
return (n + (align - 1)) & ~(align - 1);
}
int DecFmtSize(u8 fmt) {
switch (fmt) {
case DEC_NONE: return 0;
case DEC_FLOAT_1: return 4;
case DEC_FLOAT_2: return 8;
case DEC_FLOAT_3: return 12;
case DEC_FLOAT_4: return 16;
case DEC_S8_3: return 4;
case DEC_S16_3: return 8;
case DEC_U8_1: return 4;
case DEC_U8_2: return 4;
case DEC_U8_3: return 4;
case DEC_U8_4: return 4;
case DEC_U16_1: return 4;
case DEC_U16_2: return 4;
case DEC_U16_3: return 8;
case DEC_U16_4: return 8;
case DEC_U8A_2: return 4;
case DEC_U16A_2: return 4;
default:
return 0;
}
}
#if 0
// This is what the software transform spits out, and thus w
DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) {
DecVtxFormat tfm = {0};
int size = 0;
int offset = 0;
// Weights disappear during transform.
if (fmt.uvfmt) {
// UV always becomes float2.
tfm.uvfmt = DEC_FLOAT_2;
tfm.uvoff = offset;
offset += DecFmtSize(tfm.uvfmt);
}
// We always (?) get two colors out, they're floats (although we'd probably be fine with less precision).
tfm.c0fmt = DEC_FLOAT_4;
tfm.c0off = offset;
offset += DecFmtSize(tfm.c0fmt);
tfm.c1fmt = DEC_FLOAT_3; // color1 (specular) doesn't have alpha.
tfm.c1off = offset;
offset += DecFmtSize(tfm.c1fmt);
// We never get a normal, it's gone.
// But we do get a position, and it's always float3.
tfm.posfmt = DEC_FLOAT_3;
tfm.posoff = offset;
offset += DecFmtSize(tfm.posfmt);
// Update stride.
tfm.stride = offset;
return tfm;
}
#endif
void VertexDecoder::Step_WeightsU8() const
{
u8 *wt = (u8 *)(decoded_ + decFmt.w0off);
const u8 *wdata = (const u8*)(ptr_);
int j;
for (j = 0; j < nweights; j++)
wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
void VertexDecoder::Step_WeightsU16() const
{
u16 *wt = (u16 *)(decoded_ + decFmt.w0off);
const u16_le *wdata = (const u16_le*)(ptr_);
int j;
for (j = 0; j < nweights; j++)
wt[j] =wdata[j];
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
// Float weights should be uncommon, we can live with having to multiply these by 2.0
// to avoid special checks in the vertex shader generator.
// (PSP uses 0.0-2.0 fixed point numbers for weights)
void VertexDecoder::Step_WeightsFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const float_le *wdata = (const float_le*)(ptr_);
int j;
for (j = 0; j < nweights; j++) {
wt[j] = wdata[j];
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0.0f;
}
void VertexDecoder::Step_TcU8() const
{
u8 *uv = (u8 *)(decoded_ + decFmt.uvoff);
const u8 *uvdata = (const u8*)(ptr_ + tcoff);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_TcU16() const
{
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_TcU16Double() const
{
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
*uv = *uvdata;
uv[0] = uvdata[0] * 2;
uv[1] = uvdata[1] * 2;
}
void VertexDecoder::Step_TcU16Through() const
{
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_TcU16ThroughDouble() const
{
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16_le *uvdata = (const u16_le*)(ptr_ + tcoff);
uv[0] = uvdata[0] * 2;
uv[1] = uvdata[1] * 2;
}
void VertexDecoder::Step_TcFloat() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
const float_le *uvdata = (const float_le*)(ptr_ + tcoff);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_TcFloatThrough() const
{
float *uv = (float *)(decoded_ + decFmt.uvoff);
const float_le *uvdata = (const float_le*)(ptr_ + tcoff);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
void VertexDecoder::Step_TcU8Prescale() const {
float *uv = (float *)(decoded_ + decFmt.uvoff);
const u8 *uvdata = (const u8 *)(ptr_ + tcoff);
uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_TcU16Prescale() const {
float *uv = (float *)(decoded_ + decFmt.uvoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_TcFloatPrescale() const {
float *uv = (float *)(decoded_ + decFmt.uvoff);
const float_le *uvdata = (const float_le*)(ptr_ + tcoff);
uv[0] = uvdata[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_Color565() const
{
u8 *c = decoded_ + decFmt.c0off;
u16 cdata = (u16)(*(u16_le*)(ptr_ + coloff));
c[0] = 255;
c[1] = Convert5To8(cdata & 0x1f);
c[2] = Convert6To8((cdata>>5) & 0x3f);
c[3] = Convert5To8((cdata>>11) & 0x1f);
}
void VertexDecoder::Step_Color5551() const
{
u8 *c = decoded_ + decFmt.c0off;
u16 cdata = (u16)(*(u16_le*)(ptr_ + coloff));
c[0] = Convert5To8(cdata & 0x1f);
c[1] = Convert5To8((cdata>>5) & 0x1f);
c[2] = Convert5To8((cdata>>10) & 0x1f);
c[3] = (cdata >> 15) ? 255 : 0;
}
void VertexDecoder::Step_Color4444() const
{
u8 *c = decoded_ + decFmt.c0off;
u16 cdata = (u16)(*(u16_le*)(ptr_ + coloff));
c[0] = Convert4To8((cdata >> (12)) & 0xF);
c[1] = Convert4To8((cdata >> (0)) & 0xF);
c[2] = Convert4To8((cdata >> (4)) & 0xF);
c[3] = Convert4To8((cdata >> (8)) & 0xF);
}
void VertexDecoder::Step_Color8888() const
{
// Directx want ARGB
u8 *c = (u8*)(decoded_ + decFmt.c0off);
const u8 *cdata = (const u8*)(ptr_ + coloff);
c[0] = cdata[3];
c[1] = cdata[0];
c[2] = cdata[1];
c[3] = cdata[2];
}
void VertexDecoder::Step_Color565Morph() const
{
float col[3] = {0};
for (int n = 0; n < morphcount; n++)
{
float w = gstate_c.morphWeights[n];
u16 cdata = (u16)(*(u16_le*)(ptr_ + onesize_*n + coloff));
col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f);
col[1] += w * ((cdata>>5) & 0x3f) * (255.0f / 63.0f);
col[2] += w * ((cdata>>11) & 0x1f) * (255.0f / 31.0f);
}
u8 *c = decoded_ + decFmt.c0off;
// Dx want ARGB
c[0] = 255;
c[1] = (u8)col[0];
c[2] = (u8)col[1];
c[3] = (u8)col[2];
}
void VertexDecoder::Step_Color5551Morph() const
{
float col[4] = {0};
for (int n = 0; n < morphcount; n++)
{
float w = gstate_c.morphWeights[n];
u16 cdata = (u16)(*(u16_le*)(ptr_ + onesize_*n + coloff));
col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f);
col[1] += w * ((cdata>>5) & 0x1f) * (255.0f / 31.0f);
col[2] += w * ((cdata>>10) & 0x1f) * (255.0f / 31.0f);
col[3] += w * ((cdata>>15) ? 255.0f : 0.0f);
}
u8 *c = decoded_ + decFmt.c0off;
// Dx want ARGB
c[0] = (u8)col[3];
c[1] = (u8)col[0];
c[2] = (u8)col[1];
c[3] = (u8)col[2];
}
void VertexDecoder::Step_Color4444Morph() const
{
float col[4] = {0};
for (int n = 0; n < morphcount; n++)
{
float w = gstate_c.morphWeights[n];
u16 cdata = (u16)(*(u16_le*)(ptr_ + onesize_*n + coloff));
for (int j = 0; j < 4; j++)
col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f);
}
u8 *c = decoded_ + decFmt.c0off;
// Dx want ARGB
c[0] = (u8)col[3];
c[1] = (u8)col[0];
c[2] = (u8)col[1];
c[3] = (u8)col[2];
}
void VertexDecoder::Step_Color8888Morph() const
{
float col[4] = {0};
for (int n = 0; n < morphcount; n++)
{
float w = gstate_c.morphWeights[n];
const u8 *cdata = (const u8*)(ptr_ + onesize_*n + coloff);
for (int j = 0; j < 4; j++)
col[j] += w * cdata[j];
}
u8 *c = decoded_ + decFmt.c0off;
// Dx want ARGB
c[0] = (u8)col[3];
c[1] = (u8)col[0];
c[2] = (u8)col[1];
c[3] = (u8)col[2];
}
void VertexDecoder::Step_NormalS8() const
{
s8 *normal = (s8 *)(decoded_ + decFmt.nrmoff);
u8 xorval = 0;
if (gstate.reversenormals & 1)
xorval = 0xFF; // Using xor instead of - to handle -128
const s8 *sv = (const s8*)(ptr_ + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] = sv[j] ^ xorval;
normal[3] = 0;
}
void VertexDecoder::Step_NormalS16() const
{
s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
u16 xorval = 0;
if (gstate.reversenormals & 1)
xorval = 0xFFFF;
const s16_le *sv = (const s16_le*)(ptr_ + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] = sv[j] ^ xorval;
normal[3] = 0;
}
void VertexDecoder::Step_NormalFloat() const
{
float *normal = (float *)(decoded_ + decFmt.nrmoff);
float multiplier = 1.0f;
if (gstate.reversenormals & 1)
multiplier = -multiplier;
const float_le *fv = (const float_le*)(ptr_ + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] = fv[j] * multiplier;
}
void VertexDecoder::Step_NormalS8Morph() const
{
float *normal = (float *)(decoded_ + decFmt.nrmoff);
memset(normal, 0, sizeof(float)*3);
for (int n = 0; n < morphcount; n++)
{
float multiplier = gstate_c.morphWeights[n];
if (gstate.reversenormals & 1) {
multiplier = -multiplier;
}
const s8 *bv = (const s8*)(ptr_ + onesize_*n + nrmoff);
multiplier *= (1.0f/127.0f);
for (int j = 0; j < 3; j++)
normal[j] += bv[j] * multiplier;
}
}
void VertexDecoder::Step_NormalS16Morph() const
{
float *normal = (float *)(decoded_ + decFmt.nrmoff);
memset(normal, 0, sizeof(float)*3);
for (int n = 0; n < morphcount; n++)
{
float multiplier = gstate_c.morphWeights[n];
if (gstate.reversenormals & 1) {
multiplier = -multiplier;
}
const s16_le *sv = (const s16_le *)(ptr_ + onesize_*n + nrmoff);
multiplier *= (1.0f/32767.0f);
for (int j = 0; j < 3; j++)
normal[j] += sv[j] * multiplier;
}
}
void VertexDecoder::Step_NormalFloatMorph() const
{
float *normal = (float *)(decoded_ + decFmt.nrmoff);
memset(normal, 0, sizeof(float)*3);
for (int n = 0; n < morphcount; n++)
{
float multiplier = gstate_c.morphWeights[n];
if (gstate.reversenormals & 1) {
multiplier = -multiplier;
}
const float_le *fv = (const float_le*)(ptr_ + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += fv[j] * multiplier;
}
}
void VertexDecoder::Step_PosS8() const
{
s8 *v = (s8 *)(decoded_ + decFmt.posoff);
const s8 *sv = (const s8*)(ptr_ + posoff);
for (int j = 0; j < 3; j++)
v[j] = sv[j];
v[3] = 0;
}
void VertexDecoder::Step_PosS16() const
{
s16 *v = (s16 *)(decoded_ + decFmt.posoff);
const s16_le *sv = (const s16_le*)(ptr_ + posoff);
for (int j = 0; j < 3; j++)
v[j] = sv[j];
v[3] = 0;
}
void VertexDecoder::Step_PosFloat() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
const float_le *sv = (const float_le*)(ptr_ + posoff);
v[0] = sv[0];
v[1] = sv[1];
v[2] = sv[2];
}
void VertexDecoder::Step_PosS8Through() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
const s8 *sv = (const s8*)(ptr_ + posoff);
v[0] = sv[0];
v[1] = sv[1];
v[2] = sv[2];
v[3] = 0;
}
void VertexDecoder::Step_PosS16Through() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
const s16_le *sv = (const s16_le*)(ptr_ + posoff);
v[0] = sv[0];
v[1] = sv[1];
v[2] = sv[2];
v[3] = 0;
}
void VertexDecoder::Step_PosFloatThrough() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
const float_le *fv = (const float_le*)(ptr_ + posoff);
v[0] = fv[0];
v[1] = fv[1];
v[2] = fv[2];
v[3] = 0;
}
void VertexDecoder::Step_PosS8Morph() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
memset(v, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) {
float multiplier = 1.0f / 127.0f;
const s8 *sv = (const s8*)(ptr_ + onesize_*n + posoff);
for (int j = 0; j < 3; j++)
v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]);
}
}
void VertexDecoder::Step_PosS16Morph() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
memset(v, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) {
float multiplier = 1.0f / 32767.0f;
const s16_le *sv = (const s16_le*)(ptr_ + onesize_*n + posoff);
for (int j = 0; j < 3; j++)
v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]);
}
}
void VertexDecoder::Step_PosFloatMorph() const
{
float *v = (float *)(decoded_ + decFmt.posoff);
memset(v, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) {
const float_le *fv = (const float_le*)(ptr_ + onesize_*n + posoff);
for (int j = 0; j < 3; j++)
v[j] += fv[j] * gstate_c.morphWeights[n];
}
}
static const StepFunction wtstep[4] = {
0,
&VertexDecoder::Step_WeightsU8,
&VertexDecoder::Step_WeightsU16,
&VertexDecoder::Step_WeightsFloat,
};
static const StepFunction tcstep[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16,
&VertexDecoder::Step_TcFloat,
};
static const StepFunction tcstep_prescale[4] = {
0,
&VertexDecoder::Step_TcU8Prescale,
&VertexDecoder::Step_TcU16Prescale,
&VertexDecoder::Step_TcFloatPrescale,
};
static const StepFunction tcstep_through[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Through,
&VertexDecoder::Step_TcFloatThrough,
};
// Some HD Remaster games double the u16 texture coordinates.
static const StepFunction tcstep_Remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16Double,
&VertexDecoder::Step_TcFloat,
};
static const StepFunction tcstep_through_Remaster[4] = {
0,
&VertexDecoder::Step_TcU8,
&VertexDecoder::Step_TcU16ThroughDouble,
&VertexDecoder::Step_TcFloatThrough,
};
// TODO: Tc Morph
static const StepFunction colstep[8] = {
0, 0, 0, 0,
&VertexDecoder::Step_Color565,
&VertexDecoder::Step_Color5551,
&VertexDecoder::Step_Color4444,
&VertexDecoder::Step_Color8888,
};
static const StepFunction colstep_morph[8] = {
0, 0, 0, 0,
&VertexDecoder::Step_Color565Morph,
&VertexDecoder::Step_Color5551Morph,
&VertexDecoder::Step_Color4444Morph,
&VertexDecoder::Step_Color8888Morph,
};
static const StepFunction nrmstep[4] = {
0,
&VertexDecoder::Step_NormalS8,
&VertexDecoder::Step_NormalS16,
&VertexDecoder::Step_NormalFloat,
};
static const StepFunction nrmstep_morph[4] = {
0,
&VertexDecoder::Step_NormalS8Morph,
&VertexDecoder::Step_NormalS16Morph,
&VertexDecoder::Step_NormalFloatMorph,
};
static const StepFunction posstep[4] = {
0,
&VertexDecoder::Step_PosS8,
&VertexDecoder::Step_PosS16,
&VertexDecoder::Step_PosFloat,
};
static const StepFunction posstep_morph[4] = {
0,
&VertexDecoder::Step_PosS8Morph,
&VertexDecoder::Step_PosS16Morph,
&VertexDecoder::Step_PosFloatMorph,
};
static const StepFunction posstep_through[4] = {
0,
&VertexDecoder::Step_PosS8Through,
&VertexDecoder::Step_PosS16Through,
&VertexDecoder::Step_PosFloatThrough,
};
int RoundUp4(int x) {
return (x + 3) & ~3;
}
void VertexDecoder::SetVertexType(u32 fmt) {
fmt_ = fmt;
throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
numSteps_ = 0;
int biggest = 0;
size = 0;
tc = fmt & 0x3;
col = (fmt >> 2) & 0x7;
nrm = (fmt >> 5) & 0x3;
pos = (fmt >> 7) & 0x3;
weighttype = (fmt >> 9) & 0x3;
idx = (fmt >> 11) & 0x3;
morphcount = ((fmt >> 18) & 0x7)+1;
nweights = ((fmt >> 14) & 0x7)+1;
int decOff = 0;
memset(&decFmt, 0, sizeof(decFmt));
DEBUG_LOG(G3D,"VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc,col,pos,nrm,weighttype,nweights,idx,morphcount);
if (weighttype) { // && nweights?
//size = align(size, wtalign[weighttype]); unnecessary
size += wtsize[weighttype] * nweights;
if (wtalign[weighttype] > biggest)
biggest = wtalign[weighttype];
steps_[numSteps_++] = wtstep[weighttype];
int fmtBase = DEC_FLOAT_1;
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U8_1;
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U16_1;
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_FLOAT_1;
}
int numWeights = TranslateNumBones(nweights);
if (numWeights <= 4) {
decFmt.w0off = decOff;
decFmt.w0fmt = fmtBase + numWeights - 1;
decOff += DecFmtSize(decFmt.w0fmt);
} else {
decFmt.w0off = decOff;
decFmt.w0fmt = fmtBase + 3;
decOff += DecFmtSize(decFmt.w0fmt);
decFmt.w1off = decOff;
decFmt.w1fmt = fmtBase + numWeights - 5;
decOff += DecFmtSize(decFmt.w1fmt);
}
}
if (tc) {
size = align(size, tcalign[tc]);
tcoff = size;
size += tcsize[tc];
if (tcalign[tc] > biggest)
biggest = tcalign[tc];
if (g_Config.bPrescaleUV && !throughmode && gstate.getTextureFunction() == 0) {
steps_[numSteps_++] = tcstep_prescale[tc];
decFmt.uvfmt = DEC_FLOAT_2;
} else {
if (g_DoubleTextureCoordinates)
steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc];
else
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
switch (tc) {
case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
break;
case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
break;
case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
decFmt.uvfmt = DEC_FLOAT_2;
break;
}
}
decFmt.uvoff = decOff;
decOff += DecFmtSize(decFmt.uvfmt);
}
if (col) {
size = align(size, colalign[col]);
coloff = size;
size += colsize[col];
if (colalign[col] > biggest)
biggest = colalign[col];
steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col];
// All color formats decode to DEC_U8_4 currently.
// They can become floats later during transform though.
decFmt.c0fmt = DEC_U8_4;
decFmt.c0off = decOff;
decOff += DecFmtSize(decFmt.c0fmt);
} else {
coloff = 0;
}
if (nrm) {
size = align(size, nrmalign[nrm]);
nrmoff = size;
size += nrmsize[nrm];
if (nrmalign[nrm] > biggest)
biggest = nrmalign[nrm];
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
if (morphcount == 1) {
// The normal formats match the gl formats perfectly, let's use 'em.
switch (nrm) {
case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break;
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break;
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break;
}
} else {
decFmt.nrmfmt = DEC_FLOAT_3;
}
// Actually, temporarily let's not.
decFmt.nrmoff = decOff;
decOff += DecFmtSize(decFmt.nrmfmt);
}
//if (pos) - there's always a position
{
size = align(size, posalign[pos]);
posoff = size;
size += possize[pos];
if (posalign[pos] > biggest)
biggest = posalign[pos];
if (throughmode) {
steps_[numSteps_++] = posstep_through[pos];
decFmt.posfmt = DEC_FLOAT_3;
} else {
steps_[numSteps_++] = morphcount == 1 ? posstep[pos] : posstep_morph[pos];
if (morphcount == 1) {
// The non-through-mode position formats match the gl formats perfectly, let's use 'em.
switch (pos) {
case GE_VTYPE_POS_8BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S8_3; break;
case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break;
case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break;
}
} else {
// Actually, temporarily let's not.
decFmt.posfmt = DEC_FLOAT_3;
}
}
decFmt.posoff = decOff;
decOff += DecFmtSize(decFmt.posfmt);
}
decFmt.stride = decOff;
size = align(size, biggest);
onesize_ = size;
size *= morphcount;
DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest);
}
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
// Find index bounds. Could cache this in display lists.
// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
int lowerBound = 0x7FFFFFFF;
int upperBound = 0;
u32 idx = vertType & GE_VTYPE_IDX_MASK;
if (idx == GE_VTYPE_IDX_8BIT) {
const u8 *ind8 = (const u8 *)inds;
for (int i = 0; i < count; i++) {
if (ind8[i] > upperBound)
upperBound = ind8[i];
if (ind8[i] < lowerBound)
lowerBound = ind8[i];
}
} else if (idx == GE_VTYPE_IDX_16BIT) {
const u16 *ind16 = (const u16*)inds;
for (int i = 0; i < count; i++) {
if (ind16[i] > upperBound)
upperBound = ind16[i];
if (ind16[i] < lowerBound)
lowerBound = ind16[i];
}
} else {
lowerBound = 0;
upperBound = count - 1;
}
*indexLowerBound = (u16)lowerBound;
*indexUpperBound = (u16)upperBound;
}
void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, int indexLowerBound, int indexUpperBound) const {
// Decode the vertices within the found bounds, once each
// decoded_ and ptr_ are used in the steps, so can't be turned into locals for speed.
decoded_ = decodedptr;
ptr_ = (const u8*)verts + indexLowerBound * size;
int stride = decFmt.stride;
for (int index = indexLowerBound; index <= indexUpperBound; index++) {
for (int i = 0; i < numSteps_; i++) {
((*this).*steps_[i])();
}
ptr_ += size;
decoded_ += stride;
}
}
// TODO: Does not support morphs, skinning etc.
u32 VertexDecoder::InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const {
u32 customVertType = (gstate.vertType & ~GE_VTYPE_TC_MASK) | GE_VTYPE_TC_FLOAT;
VertexDecoder decOut;
decOut.SetVertexType(customVertType);
const u8 *inp = (const u8 *)verts;
u8 *out = decoded;
for (int i = 0; i < count; i++) {
if (pos) memcpy(out + decOut.posoff, inp + posoff, possize[pos]);
if (nrm) memcpy(out + decOut.nrmoff, inp + nrmoff, nrmsize[nrm]);
if (col) memcpy(out + decOut.coloff, inp + coloff, colsize[col]);
// Ignore others for now, this is all we need for puzbob.
// Inject!
memcpy(out + decOut.tcoff, &customuv[i * 2], tcsize[decOut.tc]);
inp += this->onesize_;
out += decOut.onesize_;
}
return customVertType;
}
int VertexDecoder::ToString(char *output) const {
char * start = output;
output += sprintf(output, "P: %i ", pos);
if (nrm)
output += sprintf(output, "N: %i ", nrm);
if (col)
output += sprintf(output, "C: %i ", col);
if (tc)
output += sprintf(output, "T: %i ", tc);
if (weighttype)
output += sprintf(output, "W: %i ", weighttype);
if (idx)
output += sprintf(output, "I: %i ", idx);
if (morphcount > 1)
output += sprintf(output, "Morph: %i ", morphcount);
output += sprintf(output, "Verts: %i ", stats_[STAT_VERTSSUBMITTED]);
if (throughmode)
output += sprintf(output, " (through)");
output += sprintf(output, " (size: %i)", VertexSize());
return output - start;
}

View File

@ -0,0 +1,437 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "../GPUState.h"
#include "../Globals.h"
#include "base/basictypes.h"
#include "Core/Reporting.h"
// DecVtxFormat - vertex formats for PC
// Kind of like a D3D VertexDeclaration.
// Can write code to easily bind these using OpenGL, or read these manually.
// No morph support, that is taken care of by the VertexDecoder.
enum {
DEC_NONE,
DEC_FLOAT_1,
DEC_FLOAT_2,
DEC_FLOAT_3,
DEC_FLOAT_4,
DEC_S8_3,
DEC_S16_3,
DEC_U8_1,
DEC_U8_2,
DEC_U8_3,
DEC_U8_4,
DEC_U16_1,
DEC_U16_2,
DEC_U16_3,
DEC_U16_4,
DEC_U8A_2,
DEC_U16A_2,
};
int DecFmtSize(u8 fmt);
struct DecVtxFormat {
u8 w0fmt; u8 w0off; // first 4 weights
u8 w1fmt; u8 w1off; // second 4 weights
u8 uvfmt; u8 uvoff;
u8 c0fmt; u8 c0off; // First color
u8 c1fmt; u8 c1off;
u8 nrmfmt; u8 nrmoff;
u8 posfmt; u8 posoff;
short stride;
};
// This struct too.
struct TransformedVertex
{
float x, y, z, fog; // in case of morph, preblend during decode
float u; float v; float w; // scaled by uscale, vscale, if there
u8 color0[4]; // prelit
u8 color1[4]; // prelit
};
DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt);
class VertexDecoder;
typedef void (VertexDecoder::*StepFunction)() const;
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
enum {
STAT_VERTSSUBMITTED = 0,
NUM_VERTEX_DECODER_STATS = 1
};
// Right now
// - compiles into list of called functions
// Future TODO
// - will compile into lighting fast specialized x86 and ARM
class VertexDecoder
{
public:
VertexDecoder() : coloff(0), nrmoff(0), posoff(0) {}
~VertexDecoder() {}
// prim is needed knowledge for a performance hack (PrescaleUV)
void SetVertexType(u32 vtype);
u32 VertexType() const { return fmt_; }
const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;
// This could be easily generalized to inject any one component. Don't know another use for it though.
u32 InjectUVs(u8 *decoded, const void *verts, float *customuv, int count) const;
bool hasColor() const { return col != 0; }
int VertexSize() const { return size; }
void Step_WeightsU8() const;
void Step_WeightsU16() const;
void Step_WeightsFloat() const;
void Step_TcU8() const;
void Step_TcU16() const;
void Step_TcFloat() const;
void Step_TcU8Prescale() const;
void Step_TcU16Prescale() const;
void Step_TcFloatPrescale() const;
void Step_TcU16Double() const;
void Step_TcU16Through() const;
void Step_TcU16ThroughDouble() const;
void Step_TcFloatThrough() const;
// TODO: tcmorph
void Step_Color4444() const;
void Step_Color565() const;
void Step_Color5551() const;
void Step_Color8888() const;
void Step_Color4444Morph() const;
void Step_Color565Morph() const;
void Step_Color5551Morph() const;
void Step_Color8888Morph() const;
void Step_NormalS8() const;
void Step_NormalS16() const;
void Step_NormalFloat() const;
void Step_NormalS8Morph() const;
void Step_NormalS16Morph() const;
void Step_NormalFloatMorph() const;
void Step_PosS8() const;
void Step_PosS16() const;
void Step_PosFloat() const;
void Step_PosS8Morph() const;
void Step_PosS16Morph() const;
void Step_PosFloatMorph() const;
void Step_PosS8Through() const;
void Step_PosS16Through() const;
void Step_PosFloatThrough() const;
void ResetStats() {
memset(stats_, 0, sizeof(stats_));
}
void IncrementStat(int stat, int amount) {
stats_[stat] += amount;
}
// output must be big for safety.
// Returns number of chars written.
// Ugly for speed.
int ToString(char *output) const;
// Mutable decoder state
mutable u8 *decoded_;
mutable const u8 *ptr_;
// "Immutable" state, set at startup
// The decoding steps
StepFunction steps_[5];
int numSteps_;
u32 fmt_;
DecVtxFormat decFmt;
bool throughmode;
int biggest;
int size;
int onesize_;
int weightoff;
int tcoff;
int coloff;
int nrmoff;
int posoff;
int tc;
int col;
int nrm;
int pos;
int weighttype;
int idx;
int morphcount;
int nweights;
int stats_[NUM_VERTEX_DECODER_STATS];
};
// Reads decoded vertex formats in a convenient way. For software transform and debugging.
class VertexReader
{
public:
VertexReader(u8 *base, const DecVtxFormat &decFmt, int vtype) : base_(base), data_(base), decFmt_(decFmt), vtype_(vtype) {}
void ReadPos(float pos[3]) const {
switch (decFmt_.posfmt) {
case DEC_FLOAT_3:
{
const float *f = (const float *)(data_ + decFmt_.posoff);
memcpy(pos, f, 12);
if (isThrough()) {
// Integer value passed in a float. Wraps and all, required for Monster Hunter.
pos[2] = (float)((u16)(s32)pos[2]) * (1.0f / 65535.0f);
}
}
break;
case DEC_S16_3:
{
// X and Y are signed 16 bit, Z is unsigned 16 bit
const s16 *s = (const s16 *)(data_ + decFmt_.posoff);
const u16 *u = (const u16 *)(data_ + decFmt_.posoff);
if (isThrough()) {
for (int i = 0; i < 2; i++)
pos[i] = s[i];
pos[2] = u[2] * (1.0f / 65535.0f);
} else {
for (int i = 0; i < 3; i++)
pos[i] = s[i] * (1.f / 32767.f);
}
}
break;
case DEC_S8_3:
{
// X and Y are signed 8 bit, Z is unsigned 8 bit
const s8 *b = (const s8 *)(data_ + decFmt_.posoff);
const u8 *u = (const u8 *)(data_ + decFmt_.posoff);
if (isThrough()) {
for (int i = 0; i < 2; i++)
pos[i] = b[i];
pos[2] = u[2] / 255.0f;
} else {
for (int i = 0; i < 3; i++)
pos[i] = b[i] * (1.f / 127.f);
}
}
break;
default:
ERROR_LOG(G3D, "Reader: Unsupported Pos Format");
break;
}
}
void ReadNrm(float nrm[3]) const {
switch (decFmt_.nrmfmt) {
case DEC_FLOAT_3:
//memcpy(nrm, data_ + decFmt_.nrmoff, 12);
{
const float *f = (const float *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++)
nrm[i] = f[i] ;
}
break;
case DEC_S16_3:
{
const s16 *s = (const s16 *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++)
nrm[i] = s[i] * (1.f / 32767.f);
}
break;
case DEC_S8_3:
{
const s8 *b = (const s8 *)(data_ + decFmt_.nrmoff);
for (int i = 0; i < 3; i++)
nrm[i] = b[i] * (1.f / 127.f);
}
break;
default:
ERROR_LOG(G3D, "Reader: Unsupported Nrm Format");
break;
}
}
void ReadUV(float uv[2]) const {
switch (decFmt_.uvfmt) {
case DEC_U8_2:
{
const u8 *b = (const u8 *)(data_ + decFmt_.uvoff);
uv[0] = b[0] * (1.f / 128.f);
uv[1] = b[1] * (1.f / 128.f);
}
break;
case DEC_U16_2:
{
const u16 *s = (const u16 *)(data_ + decFmt_.uvoff);
uv[0] = s[0] * (1.f / 32768.f);
uv[1] = s[1] * (1.f / 32768.f);
}
break;
case DEC_FLOAT_2:
{
const float *f = (const float *)(data_ + decFmt_.uvoff);
uv[0] = f[0];
uv[1] = f[1];
}
break;
case DEC_U16A_2:
{
const u16 *p = (const u16 *)(data_ + decFmt_.uvoff);
uv[0] = (float)p[0];
uv[1] = (float)p[1];
}
break;
default:
ERROR_LOG(G3D, "Reader: Unsupported UV Format");
break;
}
}
void ReadColor0(float color[4]) const {
switch (decFmt_.c0fmt) {
case DEC_U8_4:
{
const u8 *b = (const u8 *)(data_ + decFmt_.c0off);
for (int i = 0; i < 4; i++)
color[i] = b[i] * (1.f / 255.f);
}
break;
case DEC_FLOAT_4:
memcpy(color, data_ + decFmt_.c0off, 16);
break;
default:
ERROR_LOG(G3D, "Reader: Unsupported C0 Format");
break;
}
}
void ReadColor1(float color[3]) const {
switch (decFmt_.c1fmt) {
case DEC_U8_4:
{
const u8 *b = (const u8 *)(data_ + decFmt_.c1off);
for (int i = 0; i < 3; i++)
color[i] = b[i] * (1.f / 255.f);
}
break;
case DEC_FLOAT_4:
memcpy(color, data_ + decFmt_.c1off, 12);
break;
default:
ERROR_LOG(G3D, "Reader: Unsupported C1 Format");
break;
}
}
void ReadWeights(float weights[8]) const {
const float *f = (const float *)(data_ + decFmt_.w0off);
const u8 *b = (const u8 *)(data_ + decFmt_.w0off);
const u16 *s = (const u16 *)(data_ + decFmt_.w0off);
switch (decFmt_.w0fmt) {
case DEC_FLOAT_1:
case DEC_FLOAT_2:
case DEC_FLOAT_3:
case DEC_FLOAT_4:
for (int i = 0; i <= decFmt_.w0fmt - DEC_FLOAT_1; i++)
weights[i] = f[i];
break;
case DEC_U8_1: weights[0] = b[0] * (1.f / 128.f); break;
case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i] = b[i] * (1.f / 128.f); break;
case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i] = b[i] * (1.f / 128.f); break;
case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i] = b[i] * (1.f / 128.f); break;
case DEC_U16_1: weights[0] = s[0] * (1.f / 32768.f); break;
case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i] = s[i] * (1.f / 32768.f); break;
case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i] = s[i] * (1.f / 32768.f); break;
case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i] = s[i] * (1.f / 32768.f); break;
default:
ERROR_LOG(G3D, "Reader: Unsupported W0 Format");
break;
}
f = (const float *)(data_ + decFmt_.w1off);
b = (const u8 *)(data_ + decFmt_.w1off);
s = (const u16 *)(data_ + decFmt_.w1off);
switch (decFmt_.w1fmt) {
case 0:
// It's fine for there to be w0 weights but not w1.
break;
case DEC_FLOAT_1:
case DEC_FLOAT_2:
case DEC_FLOAT_3:
case DEC_FLOAT_4:
for (int i = 0; i <= decFmt_.w1fmt - DEC_FLOAT_1; i++)
weights[i+4] = f[i];
break;
case DEC_U8_1: weights[4] = b[0] * (1.f / 128.f); break;
case DEC_U8_2: for (int i = 0; i < 2; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
case DEC_U8_3: for (int i = 0; i < 3; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
case DEC_U8_4: for (int i = 0; i < 4; i++) weights[i+4] = b[i] * (1.f / 128.f); break;
case DEC_U16_1: weights[4] = s[0] * (1.f / 32768.f); break;
case DEC_U16_2: for (int i = 0; i < 2; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
case DEC_U16_3: for (int i = 0; i < 3; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
case DEC_U16_4: for (int i = 0; i < 4; i++) weights[i+4] = s[i] * (1.f / 32768.f); break;
default:
ERROR_LOG(G3D, "Reader: Unsupported W1 Format");
break;
}
}
bool hasColor0() const { return decFmt_.c0fmt != 0; }
bool hasColor1() const { return decFmt_.c1fmt != 0; }
bool hasNormal() const { return decFmt_.nrmfmt != 0; }
bool hasUV() const { return decFmt_.uvfmt != 0; }
bool isThrough() const { return (vtype_ & GE_VTYPE_THROUGH) != 0; }
void Goto(int index) {
data_ = base_ + index * decFmt_.stride;
}
private:
u8 *base_;
u8 *data_;
DecVtxFormat decFmt_;
int vtype_;
};
// Debugging utilities
void PrintDecodedVertex(VertexReader &vtx);

View File

@ -0,0 +1,251 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <stdio.h>
#include <locale.h>
#if defined(_WIN32) && defined(_DEBUG)
#include "Common/CommonWindows.h"
#endif
#include "base/stringutil.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "Core/Config.h"
#include "GPU/Directx9/VertexShaderGenerator.h"
#undef WRITE
#define WRITE p+=sprintf
bool CanUseHardwareTransform(int prim) {
if (!g_Config.bHardwareTransform)
return false;
return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES;
}
int TranslateNumBones(int bones) {
if (!bones) return 0;
if (bones < 4) return 4;
// if (bones < 8) return 8; I get drawing problems in FF:CC with this!
return bones;
}
// prim so we can special case for RECTANGLES :(
void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
const u32 vertType = gstate.vertType;
int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool doTextureProjection = gstate.getUVGenMode() == 1;
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
bool hasNormal = (vertType & GE_VTYPE_NRM_MASK) != 0;
bool hasBones = (vertType & GE_VTYPE_WEIGHT_MASK) != 0;
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
memset(id->d, 0, sizeof(id->d));
id->d[0] = lmode & 1;
id->d[0] |= ((int)gstate.isModeThrough()) << 1;
id->d[0] |= ((int)enableFog) << 2;
id->d[0] |= doTexture << 3;
id->d[0] |= (hasColor & 1) << 4;
if (doTexture) {
id->d[0] |= (gstate_c.flipTexture & 1) << 5;
id->d[0] |= (doTextureProjection & 1) << 6;
}
if (useHWTransform) {
id->d[0] |= 1 << 8;
id->d[0] |= (hasNormal & 1) << 9;
// UV generation mode
id->d[0] |= gstate.getUVGenMode() << 16;
// The next bits are used differently depending on UVgen mode
if (gstate.getUVGenMode() == 1) {
id->d[0] |= gstate.getUVProjMode() << 18;
} else if (gstate.getUVGenMode() == 2) {
id->d[0] |= gstate.getUVLS0() << 18;
id->d[0] |= gstate.getUVLS1() << 20;
}
// Bones
if (hasBones)
id->d[0] |= (TranslateNumBones(gstate.getNumBoneWeights()) - 1) << 22;
// Okay, d[1] coming up. ==============
if (gstate.isLightingEnabled() || gstate.getUVGenMode() == 2) {
// Light bits
for (int i = 0; i < 4; i++) {
id->d[1] |= gstate.getLightComputation(i) << (i * 4);
id->d[1] |= gstate.getLightType(i) << (i * 4 + 2);
}
id->d[1] |= (gstate.materialupdate & 7) << 16;
for (int i = 0; i < 4; i++) {
id->d[1] |= (gstate.isLightChanEnabled(i) & 1) << (20 + i);
}
}
id->d[1] |= gstate.isLightingEnabled() << 24;
id->d[1] |= ((vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
}
}
static const char * const boneWeightAttrDecl[8] = {
"attribute mediump float a_w1;\n",
"attribute mediump vec2 a_w1;\n",
"attribute mediump vec3 a_w1;\n",
"attribute mediump vec4 a_w1;\n",
"attribute mediump vec4 a_w1;\nattribute mediump float a_w2;\n",
"attribute mediump vec4 a_w1;\nattribute mediump vec2 a_w2;\n",
"attribute mediump vec4 a_w1;\nattribute mediump vec3 a_w2;\n",
"attribute mediump vec4 a_w1;\nattribute mediump vec4 a_w2;\n",
};
enum DoLightComputation {
LIGHT_OFF,
LIGHT_SHADE,
LIGHT_FULL,
};
#if 0 // used for debugging
void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
const char * vscode =
" float4x4 u_proj : register(c0); "
" "
" struct VS_IN "
" "
" { "
" float4 ObjPos : POSITION; "
" float3 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" struct VS_OUT "
" { "
" float4 ObjPos : POSITION; "
" float4 Uv : TEXCOORD0; "
" float4 C1 : COLOR0; " // Vertex color
" float4 C2 : COLOR1; " // Vertex color
" }; "
" "
" VS_OUT main( VS_IN In ) "
" { "
" VS_OUT Out; "
" Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj ); " // Transform vertex into
" Out.Uv = float4(In.Uv.xy, 0, In.Uv.z); "
" Out.C1 = In.C1; "
" Out.C2 = In.C2; "
" return Out; " // Transfer color
" } ";
strcpy(buffer, vscode);
}
#else
void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
char *p = buffer;
const u32 vertType = gstate.vertType;
int lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
int doTexture = gstate.isTextureMapEnabled() && !gstate.isModeClear();
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0 || !useHWTransform;
bool hasNormal = (vertType & GE_VTYPE_NRM_MASK) != 0 && useHWTransform;
bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
bool flipV = gstate_c.flipTexture;
bool doTextureProjection = gstate.getUVGenMode() == 1;
DoLightComputation doLight[4] = {LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF};
if (useHWTransform) {
int shadeLight0 = gstate.getUVGenMode() == 2 ? gstate.getUVLS0() : -1;
int shadeLight1 = gstate.getUVGenMode() == 2 ? gstate.getUVLS1() : -1;
for (int i = 0; i < 4; i++) {
if (i == shadeLight0 || i == shadeLight1)
doLight[i] = LIGHT_SHADE;
if (gstate.isLightingEnabled() && gstate.isLightChanEnabled(i))
doLight[i] = LIGHT_FULL;
}
}
if (gstate.isModeThrough()) {
WRITE(p, "float4x4 u_proj_through;\n");
} else {
WRITE(p, "float4x4 u_proj;\n");
// Add all the uniforms we'll need to transform properly.
}
if (useHWTransform || !hasColor)
WRITE(p, "float4 u_matambientalpha;\n"); // matambient + matalpha
WRITE(p, " struct VS_IN ");
WRITE(p, " ");
WRITE(p, " { ");
WRITE(p, " float4 ObjPos : POSITION; ");
WRITE(p, " float3 Uv : TEXCOORD0; ");
WRITE(p, " float4 C1 : COLOR0; ");
WRITE(p, " float4 C2 : COLOR1; ");
WRITE(p, " }; ");
WRITE(p, " ");
WRITE(p, " struct VS_OUT ");
WRITE(p, " { ");
WRITE(p, " float4 ObjPos : POSITION; ");
WRITE(p, " float4 Uv : TEXCOORD0; ");
WRITE(p, " float4 C1 : COLOR0; ");
WRITE(p, " float4 C2 : COLOR1; ");
if (enableFog) {
WRITE(p, "float v_fogdepth:FOG;\n");
}
WRITE(p, " }; ");
WRITE(p, " ");
WRITE(p, " VS_OUT main( VS_IN In ) ");
WRITE(p, " { ");
WRITE(p, " VS_OUT Out; ");
if (1) {
// Simple pass-through of vertex data to fragment shader
if (gstate.isModeThrough()) {
WRITE(p, "Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj_through );");
//WRITE(p, "Out.ObjPos.z = ((1+Out.ObjPos.z)/2);"); // Dx z versus opengl z
} else {
//WRITE(p, " Out.ObjPos = mul( u_proj, float4(In.ObjPos.xyz, 1) );");
WRITE(p, "Out.ObjPos = mul( float4(In.ObjPos.xyz, 1), u_proj );");
//WRITE(p, "Out.ObjPos.z = ((1+Out.ObjPos.z)/2);"); // Dx z versus opengl z
}
//WRITE(p, "Out.Uv = In.Uv;");
WRITE(p, "Out.Uv = float4(In.Uv.xy, 0, In.Uv.z);");
if (hasColor) {
WRITE(p, "Out.C1 = In.C1;");
WRITE(p, "Out.C2 = In.C2;");
} else {
WRITE(p, " Out.C1 = u_matambientalpha;\n");
WRITE(p, " Out.C2 = float4(0,0,0,0);\n");
}
if (enableFog) {
WRITE(p, " Out.v_fogdepth = In.ObjPos.w;\n");
}
WRITE(p, " return Out; ");
}
WRITE(p, "}\n");
}
#endif

View File

@ -0,0 +1,57 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "Globals.h"
// #define USE_BONE_ARRAY
struct VertexShaderID
{
VertexShaderID() {d[0] = 0xFFFFFFFF;}
void clear() {d[0] = 0xFFFFFFFF;}
u32 d[2];
bool operator < (const VertexShaderID &other) const
{
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
{
if (d[i] < other.d[i])
return true;
if (d[i] > other.d[i])
return false;
}
return false;
}
bool operator == (const VertexShaderID &other) const
{
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++)
{
if (d[i] != other.d[i])
return false;
}
return true;
}
};
bool CanUseHardwareTransform(int prim);
void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform);
void GenerateVertexShader(int prim, char *buffer, bool useHWTransform);
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
int TranslateNumBones(int bones);

View File

@ -0,0 +1,79 @@
#include "dx_state.h"
#include <assert.h>
DirectxState dxstate;
GLExtensions gl_extensions;
int DirectxState::state_count = 0;
void DirectxState::Initialize() {
if(initialized) return;
Restore();
initialized = true;
}
void DirectxState::Restore() {
int count = 0;
blend.restore(); count++;
blendEquation.restore(); count++;
blendFunc.restore(); count++;
blendColor.restore(); count++;
scissorTest.restore(); count++;
scissorRect.restore(); count++;
//cullFace.restore(); count++;
//cullFaceMode.restore(); count++;
cullMode.restore(); count++;
depthTest.restore(); count++;
// depthRange.restore(); count++;
depthFunc.restore(); count++;
depthWrite.restore(); count++;
colorMask.restore(); count++;
viewport.restore(); count++;
stencilTest.restore(); count++;
stencilOp.restore(); count++;
stencilFunc.restore(); count++;
dither.restore(); count++;
assert(count == state_count && "DirectxState::Restore is missing some states");
}
void CheckGLExtensions() {
static bool done = false;
if (done)
return;
done = true;
memset(&gl_extensions, 0, sizeof(gl_extensions));
/*
gl_extensions.OES_packed_depth_stencil = strstr(extString, "GL_OES_packed_depth_stencil") != 0;
gl_extensions.OES_depth24 = strstr(extString, "GL_OES_depth24") != 0;
gl_extensions.OES_depth_texture = strstr(extString, "GL_OES_depth_texture") != 0;
gl_extensions.EXT_discard_framebuffer = strstr(extString, "GL_EXT_discard_framebuffer") != 0;
#ifdef USING_GLES2
gl_extensions.FBO_ARB = true;
gl_extensions.FBO_EXT = false;
#else
gl_extensions.FBO_ARB = strstr(extString, "GL_ARB_framebuffer_object") != 0;
gl_extensions.FBO_EXT = strstr(extString, "GL_EXT_framebuffer_object") != 0;
#endif
*/
}
void DirectxState::SetVSyncInterval(int interval) {
/*
#ifdef _WIN32
if( wglSwapIntervalEXT )
wglSwapIntervalEXT(interval);
#endif
*/
}

View File

@ -0,0 +1,332 @@
#pragma once
#include <functional>
#include <string.h>
#include "global.h"
// OpenGL state cache. Should convert all code to use this instead of directly calling glEnable etc,
// as GL state changes can be expensive on some hardware.
class DirectxState
{
private:
template<D3DRENDERSTATETYPE cap, bool init>
class BoolState {
bool _value;
public:
BoolState() : _value(init) {
DirectxState::state_count++;
}
inline void set(bool value) {
_value = value;
pD3Ddevice->SetRenderState(cap, value);
}
inline void enable() {
set(true);
}
inline void disable() {
set(false);
}
operator bool() const {
return isset();
}
inline bool isset() {
return _value;
}
void restore() {
pD3Ddevice->SetRenderState(cap, _value);
}
};
template<D3DRENDERSTATETYPE state1, DWORD p1def>
class DxState1 {
D3DRENDERSTATETYPE _state1;
DWORD p1;
public:
DxState1() : _state1(state1), p1(p1def) {
DirectxState::state_count++;
}
inline void set(DWORD newp1) {
p1 = newp1;
pD3Ddevice->SetRenderState(_state1, p1);
}
void restore() {
pD3Ddevice->SetRenderState(_state1, p1);
}
};
template<D3DRENDERSTATETYPE state1, DWORD p1def, D3DRENDERSTATETYPE state2, DWORD p2def>
class DxState2 {
D3DRENDERSTATETYPE _state1;
D3DRENDERSTATETYPE _state2;
DWORD p1;
DWORD p2;
public:
DxState2() : _state1(state1),_state2(state2), p1(p1def), p2(p2def) {
DirectxState::state_count++;
}
inline void set(DWORD newp1, DWORD newp2) {
p1 = newp1;
p2 = newp2;
pD3Ddevice->SetRenderState(_state1, p1);
pD3Ddevice->SetRenderState(_state2, p2);
}
void restore() {
pD3Ddevice->SetRenderState(_state1, p1);
pD3Ddevice->SetRenderState(_state2, p2);
}
};
template<D3DRENDERSTATETYPE state1, DWORD p1def, D3DRENDERSTATETYPE state2, DWORD p2def, D3DRENDERSTATETYPE state3, DWORD p3def>
class DxState3 {
D3DRENDERSTATETYPE _state1;
D3DRENDERSTATETYPE _state2;
D3DRENDERSTATETYPE _state3;
DWORD p1;
DWORD p2;
DWORD p3;
public:
DxState3() : _state1(state1),_state2(state2), _state3(state3),
p1(p1def), p2(p2def), p3(p3def) {
DirectxState::state_count++;
}
inline void set(DWORD newp1, DWORD newp2, DWORD newp3) {
p1 = newp1;
p2 = newp2;
p3 = newp3;
pD3Ddevice->SetRenderState(_state1, p1);
pD3Ddevice->SetRenderState(_state2, p2);
pD3Ddevice->SetRenderState(_state3, p2);
}
void restore() {
pD3Ddevice->SetRenderState(_state1, p1);
pD3Ddevice->SetRenderState(_state2, p2);
pD3Ddevice->SetRenderState(_state3, p2);
}
};
#define STATE4(func, p1type, p2type, p3type, p4type, p1def, p2def, p3def, p4def) \
class SavedState4_##func { \
p1type p1; \
p2type p2; \
p3type p3; \
p4type p4; \
public: \
SavedState4_##func() : p1(p1def), p2(p2def), p3(p3def), p4(p4def) { \
DirectxState::state_count++; \
}; \
inline void set(p1type newp1, p2type newp2, p3type newp3, p4type newp4) { \
p1 = newp1; \
p2 = newp2; \
p3 = newp3; \
p4 = newp4; \
func(p1, p2, p3, p4); \
} \
inline void restore() { \
func(p1, p2, p3, p4); \
} \
}
#define STATEFLOAT4(func, def) \
class SavedState4_##func { \
float p[4]; \
public: \
SavedState4_##func() { \
for (int i = 0; i < 4; i++) {p[i] = def;} \
DirectxState::state_count++; \
}; \
inline void set(const float v[4]) { \
if(memcmp(p,v,sizeof(float)*4)) { \
memcpy(p,v,sizeof(float)*4); \
func(p[0], p[1], p[2], p[3]); \
} \
} \
inline void restore() { \
func(p[0], p[1], p[2], p[3]); \
} \
}
class SavedBlendFactor {
DWORD c;
public:
SavedBlendFactor() {
c = 0xFFFFFFFF;
DirectxState::state_count++;
}
inline void set(const float v[4]) {
c = D3DCOLOR_COLORVALUE(v[0], v[1], v[2], v[3]);
pD3Ddevice->SetRenderState(D3DRS_BLENDFACTOR, c);
}
inline void restore() {
pD3Ddevice->SetRenderState(D3DRS_BLENDFACTOR, c);
}
};
class SavedColorMask {
DWORD mask;
public:
SavedColorMask() {
mask = D3DCOLORWRITEENABLE_ALL;
DirectxState::state_count++;
}
inline void set(bool r, bool g, bool b, bool a) {
mask = 0;
if (r) {
mask |=D3DCOLORWRITEENABLE_RED;
}
if (g) {
mask |=D3DCOLORWRITEENABLE_GREEN;
}
if (b) {
mask |=D3DCOLORWRITEENABLE_BLUE;
}
if (a) {
mask |=D3DCOLORWRITEENABLE_ALPHA;
}
pD3Ddevice->SetRenderState(D3DRS_COLORWRITEENABLE, mask);
}
inline void restore() {
pD3Ddevice->SetRenderState(D3DRS_COLORWRITEENABLE, mask);
}
};
class BoolUnused {
public:
BoolUnused() {
DirectxState::state_count++;
}
inline void set(bool) {
}
inline void restore() {
}
inline void enable() {
set(true);
}
inline void disable() {
set(false);
}
};
class StateVp {
D3DVIEWPORT9 viewport;
public:
inline void set(int x, int y, int w, int h, float n = 0.f, float f = 1.f) {
viewport.X=x;
viewport.Y=y;
viewport.Width=w;
viewport.Height=h;
/*
if (f > n) {
viewport.MinZ=n;
viewport.MaxZ=f;
} else {
viewport.MinZ=f;
viewport.MaxZ=n;
}
*/
viewport.MinZ=n;
viewport.MaxZ=f;
pD3Ddevice->SetViewport(&viewport);
}
inline void restore() {
pD3Ddevice->SetViewport(&viewport);
}
};
class StateScissor {
public:
inline void set(int x1, int y1, int x2, int y2) {
RECT rect = {x1, y1, x2, y2};
//pD3Ddevice->SetScissorRect(&rect);
}
inline void restore() {
}
};
class CullMode {
DWORD cull;
public:
inline void set(int wantcull, int cullmode) {
if (!wantcull) {
// disable
cull = D3DCULL_NONE;
} else {
// add front face ...
cull = cullmode==0?D3DCULL_CW:D3DCULL_CCW;
}
pD3Ddevice->SetRenderState(D3DRS_CULLMODE, cull);
}
inline void restore() {
pD3Ddevice->SetRenderState(D3DRS_CULLMODE, cull);
}
};
bool initialized;
public:
static int state_count;
DirectxState() : initialized(false) {}
void Initialize();
void Restore();
// When adding a state here, don't forget to add it to DirectxState::Restore() too
BoolState<D3DRS_ALPHABLENDENABLE, false> blend;
DxState2<D3DRS_SRCBLEND, D3DBLEND_SRCALPHA, D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA> blendFunc;
DxState1<D3DRS_BLENDOP, D3DBLENDOP_ADD> blendEquation;
SavedBlendFactor blendColor;
BoolState<D3DRS_SCISSORTESTENABLE, false> scissorTest;
BoolUnused dither;
CullMode cullMode;
BoolState<D3DRS_ZENABLE, false> depthTest;
DxState1<D3DRS_ZFUNC, D3DCMP_LESSEQUAL> depthFunc;
DxState1<D3DRS_ZWRITEENABLE, TRUE> depthWrite;
SavedColorMask colorMask;
StateVp viewport;
StateScissor scissorRect;
BoolState<D3DRS_STENCILENABLE, false> stencilTest;
DxState3<D3DRS_STENCILFAIL, D3DSTENCILOP_KEEP, D3DRS_STENCILZFAIL, D3DSTENCILOP_KEEP, D3DRS_STENCILPASS, D3DSTENCILOP_KEEP> stencilOp;
DxState3<D3DRS_STENCILFUNC, D3DCMP_ALWAYS, D3DRS_STENCILREF, 0, D3DRS_STENCILMASK, 0xFFFFFFFF> stencilFunc;
// Only works on Win32, all other platforms are "force-vsync"
void SetVSyncInterval(int interval); // one of the above VSYNC, or a higher number for multi-frame waits (could be useful for 30hz games)
};
#undef STATE1
#undef STATE2
extern DirectxState dxstate;
struct GLExtensions {
bool OES_depth24;
bool OES_packed_depth_stencil;
bool OES_depth_texture;
bool EXT_discard_framebuffer;
bool FBO_ARB;
bool FBO_EXT;
};
extern GLExtensions gl_extensions;
void CheckGLExtensions();

107
GPU/Directx9/helper/fbo.cpp Normal file
View File

@ -0,0 +1,107 @@
#include "global.h"
#include <stdint.h>
#include <string.h>
#include "fbo.h"
static LPDIRECT3DSURFACE9 deviceRTsurf;
static LPDIRECT3DSURFACE9 deviceDSsurf;
struct FBO {
LPDIRECT3DSURFACE9 surf;
LPDIRECT3DSURFACE9 depthstencil;
LPDIRECT3DTEXTURE9 tex;
uint32_t color_texture;
uint32_t z_stencil_buffer; // Either this is set, or the two below.
uint32_t z_buffer;
uint32_t stencil_buffer;
int width;
int height;
FBOColorDepth colorDepth;
};
void fbo_init() {
pD3Ddevice->GetRenderTarget(0, &deviceRTsurf);
pD3Ddevice->GetDepthStencilSurface(&deviceDSsurf);
}
FBO * current_fbo = NULL;
FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, FBOColorDepth colorDepth) {
FBO *fbo = new FBO();
fbo->width = width;
fbo->height = height;
fbo->colorDepth = colorDepth;
// only support 32bit surfaces
//pD3Ddevice->CreateRenderTarget(fbo->width, fbo->height, D3DFMT_A8R8G8B8, D3DMULTISAMPLE_NONE, 0, FALSE, &fbo->surf, NULL);
/*
// Create depth + stencil target | forced to 24-bit Z, 8-bit stencil
pD3Ddevice->CreateDepthStencilSurface(fbo->width, fbo->height, D3DFMT_D24S8, D3DMULTISAMPLE_NONE, 0, FALSE, &fbo->depthstencil, NULL);
*/
// Only needed on xbox :s
pD3Ddevice->CreateTexture(fbo->width, fbo->height, 1, 0, D3DFMT_A8R8G8B8, 0, &fbo->tex, NULL);
fbo->stencil_buffer = 8;
fbo->z_buffer = 24;
return fbo;
}
void * fbo_get_rtt(FBO *fbo) {
return fbo->tex;
}
void fbo_unbind() {
if (current_fbo != NULL) {
//pD3Ddevice->Resolve( D3DRESOLVE_RENDERTARGET0, NULL, current_fbo->tex, NULL, 0, 0, NULL, 0.0f, 0, NULL );
}
current_fbo = NULL;
//pD3Ddevice->SetRenderTarget(0, deviceRTsurf);
//pD3Ddevice->SetDepthStencilSurface(deviceDSsurf);
}
void fbo_resolve(FBO *fbo) {
pD3Ddevice->Resolve( D3DRESOLVE_RENDERTARGET0|D3DRESOLVE_ALLFRAGMENTS|D3DRESOLVE_CLEARRENDERTARGET|D3DRESOLVE_CLEARDEPTHSTENCIL, NULL, fbo->tex, NULL, 0, 0, NULL, 0.0f, 0, NULL );
}
void fbo_bind_as_render_target(FBO *fbo) {
current_fbo = fbo;
//pD3Ddevice->SetRenderTarget(0, fbo->surf);
//pD3Ddevice->SetDepthStencilSurface(fbo->depthstencil);
}
void fbo_bind_for_read(FBO *fbo) {
OutputDebugStringA("fbo_bind_for_read: Fix me\r\n");
}
void fbo_bind_color_as_texture(FBO *fbo, int color) {
//OutputDebugStringA("fbo_bind_color_as_texture: Fix me\r\n");
//pD3Ddevice->SetTexture(0, fbo->tex);
pD3Ddevice->SetTexture(0, NULL);
}
void fbo_destroy(FBO *fbo) {
/*
fbo->depthstencil->Release();
*/
//fbo->surf->Release();
fbo->tex->Release();
delete fbo;
}
void fbo_get_dimensions(FBO *fbo, int *w, int *h) {
*w = fbo->width;
*h = fbo->height;
}
void SwapBuffer() {
pD3Ddevice->Present(0, 0, 0, 0);
// :s
pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, 0xFFFFFFFF, 0, 0);
}

39
GPU/Directx9/helper/fbo.h Normal file
View File

@ -0,0 +1,39 @@
#pragma once
// Simple wrapper around FBO functionality.
// Very C-ish API because that's what I felt like, and it's cool to completely
// hide the data from callers...
struct FBO;
enum FBOColorDepth {
FBO_8888,
FBO_565,
FBO_4444,
FBO_5551,
};
// Creates a simple FBO with a RGBA32 color buffer stored in a texture, and
// optionally an accompanying Z/stencil buffer.
// No mipmap support.
// num_color_textures must be 1 for now.
// you lose bound texture state.
// On some hardware, you might get a 24-bit depth buffer even though you only wanted a 16-bit one.
FBO *fbo_create(int width, int height, int num_color_textures, bool z_stencil, FBOColorDepth colorDepth = FBO_8888);
// These functions should be self explanatory.
void fbo_bind_as_render_target(FBO *fbo);
// color must be 0, for now.
void fbo_bind_color_as_texture(FBO *fbo, int color);
void fbo_bind_for_read(FBO *fbo);
void fbo_unbind();
void fbo_destroy(FBO *fbo);
void fbo_get_dimensions(FBO *fbo, int *w, int *h);
void fbo_resolve(FBO *fbo);
void * fbo_get_rtt(FBO *fbo);
// To get default depth and rt surface
void fbo_init();

View File

@ -0,0 +1,229 @@
#include "global.h"
#include "fbo.h"
LPDIRECT3DDEVICE9 pD3Ddevice = NULL;
LPDIRECT3D9 pD3D = NULL;
static const char * vscode =
" float4x4 matWVP : register(c0); "
" "
" struct VS_IN "
" "
" { "
" float4 ObjPos : POSITION; "
" float2 Uv : TEXCOORD0; " // Vertex color
" }; "
" "
" struct VS_OUT "
" { "
" float4 ProjPos : POSITION; "
" float2 Uv : TEXCOORD0; " // Vertex color
" }; "
" "
" VS_OUT main( VS_IN In ) "
" { "
" VS_OUT Out; "
" Out.ProjPos = mul( matWVP, In.ObjPos ); " // Transform vertex into
" Out.Uv = In.Uv; "
" return Out; " // Transfer color
" } ";
//--------------------------------------------------------------------------------------
// Pixel shader
//--------------------------------------------------------------------------------------
static const char * pscode =
" sampler s: register(s0); "
" struct PS_IN "
" { "
" float2 Uv : TEXCOORD0; "
" }; "
" "
" float4 main( PS_IN In ) : COLOR "
" { "
" float4 c = tex2D(s, In.Uv) ; "
" c.a = 1.0f;"
" return c; "
" } ";
IDirect3DVertexDeclaration9* pFramebufferVertexDecl = NULL;
static const D3DVERTEXELEMENT9 VertexElements[] =
{
{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
{ 0, 12, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
D3DDECL_END()
};
IDirect3DVertexDeclaration9* pSoftVertexDecl = NULL;
static const D3DVERTEXELEMENT9 SoftTransVertexElements[] =
{
{ 0, 0, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
{ 0, 16, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
{ 0, 28, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 },
{ 0, 32, D3DDECLTYPE_D3DCOLOR, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 1 },
D3DDECL_END()
};
LPDIRECT3DVERTEXSHADER9 pFramebufferVertexShader = NULL; // Vertex Shader
LPDIRECT3DPIXELSHADER9 pFramebufferPixelShader = NULL; // Pixel Shader
bool CompilePixelShader(const char * code, LPDIRECT3DPIXELSHADER9 * pShader, LPD3DXCONSTANTTABLE * pShaderTable) {
LPD3DXCONSTANTTABLE shaderTable = *pShaderTable;
ID3DXBuffer* pShaderCode;
ID3DXBuffer* pErrorMsg;
HRESULT hr;
// Compile pixel shader.
hr = D3DXCompileShader( code,
(UINT)strlen( code ),
NULL,
NULL,
"main",
"ps_3_0",
0,
&pShaderCode,
&pErrorMsg,
pShaderTable );
if( FAILED(hr) )
{
OutputDebugStringA((CHAR*)pErrorMsg->GetBufferPointer());
DebugBreak();
return false;
}
// Create pixel shader.
pD3Ddevice->CreatePixelShader( (DWORD*)pShaderCode->GetBufferPointer(),
pShader );
pShaderCode->Release();
return true;
}
bool CompileVertexShader(const char * code, LPDIRECT3DVERTEXSHADER9 * pShader, LPD3DXCONSTANTTABLE * pShaderTable) {
LPD3DXCONSTANTTABLE shaderTable = *pShaderTable;
ID3DXBuffer* pShaderCode;
ID3DXBuffer* pErrorMsg;
HRESULT hr;
// Compile pixel shader.
hr = D3DXCompileShader( code,
(UINT)strlen( code ),
NULL,
NULL,
"main",
"vs_3_0",
0,
&pShaderCode,
&pErrorMsg,
pShaderTable );
if( FAILED(hr) )
{
OutputDebugStringA((CHAR*)pErrorMsg->GetBufferPointer());
DebugBreak();
return false;
}
// Create pixel shader.
pD3Ddevice->CreateVertexShader( (DWORD*)pShaderCode->GetBufferPointer(),
pShader );
pShaderCode->Release();
return true;
}
void CompileShaders() {
ID3DXBuffer* pShaderCode;
ID3DXBuffer* pErrorMsg;
HRESULT hr;
// Compile vertex shader.
hr = D3DXCompileShader( vscode,
(UINT)strlen( vscode ),
NULL,
NULL,
"main",
"vs_2_0",
0,
&pShaderCode,
&pErrorMsg,
NULL );
if( FAILED(hr) )
{
OutputDebugStringA((CHAR*)pErrorMsg->GetBufferPointer());
DebugBreak();
}
// Create pixel shader.
pD3Ddevice->CreateVertexShader( (DWORD*)pShaderCode->GetBufferPointer(),
&pFramebufferVertexShader );
pShaderCode->Release();
// Compile pixel shader.
hr = D3DXCompileShader( pscode,
(UINT)strlen( pscode ),
NULL,
NULL,
"main",
"ps_2_0",
0,
&pShaderCode,
&pErrorMsg,
NULL );
if( FAILED(hr) )
{
OutputDebugStringA((CHAR*)pErrorMsg->GetBufferPointer());
DebugBreak();
}
// Create pixel shader.
pD3Ddevice->CreatePixelShader( (DWORD*)pShaderCode->GetBufferPointer(),
&pFramebufferPixelShader );
pShaderCode->Release();
pD3Ddevice->CreateVertexDeclaration( VertexElements, &pFramebufferVertexDecl );
pD3Ddevice->SetVertexDeclaration( pFramebufferVertexDecl );
pD3Ddevice->CreateVertexDeclaration( SoftTransVertexElements, &pSoftVertexDecl );
}
void DirectxInit() {
pD3D = Direct3DCreate9( D3D_SDK_VERSION );
// Set up the structure used to create the D3DDevice. Most parameters are
// zeroed out. We set Windowed to TRUE, since we want to do D3D in a
// window, and then set the SwapEffect to "discard", which is the most
// efficient method of presenting the back buffer to the display. And
// we request a back buffer format that matches the current desktop display
// format.
D3DPRESENT_PARAMETERS d3dpp;
ZeroMemory( &d3dpp, sizeof( d3dpp ) );
d3dpp.BackBufferWidth = 1280;
d3dpp.BackBufferHeight = 720;
d3dpp.BackBufferFormat = ( D3DFORMAT )( D3DFMT_A8R8G8B8 );
d3dpp.FrontBufferFormat = ( D3DFORMAT )( D3DFMT_LE_A8R8G8B8 );
d3dpp.MultiSampleType = D3DMULTISAMPLE_NONE;
d3dpp.MultiSampleQuality = 0;
d3dpp.BackBufferCount = 1;
d3dpp.EnableAutoDepthStencil = TRUE;
d3dpp.AutoDepthStencilFormat = D3DFMT_D24S8;
d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
pD3D->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, NULL,
D3DCREATE_HARDWARE_VERTEXPROCESSING,
&d3dpp, &pD3Ddevice);
CompileShaders();
fbo_init();
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <xtl.h>
#include <d3d9.h>
#include <d3dx9.h>
// Used on xbox to create a linear format
#define D3DFMT(x) (D3DFORMAT)MAKELINFMT(x)
extern LPDIRECT3DDEVICE9 pD3Ddevice;
extern LPDIRECT3DVERTEXSHADER9 pFramebufferVertexShader; // Vertex Shader
extern LPDIRECT3DPIXELSHADER9 pFramebufferPixelShader; // Pixel Shader
extern IDirect3DVertexDeclaration9* pFramebufferVertexDecl;
extern IDirect3DVertexDeclaration9* pSoftVertexDecl;
bool CompilePixelShader(const char * code, LPDIRECT3DPIXELSHADER9 * pShader, LPD3DXCONSTANTTABLE * pShaderTable);
bool CompileVertexShader(const char * code, LPDIRECT3DVERTEXSHADER9 * pShader, LPD3DXCONSTANTTABLE * pShaderTable);
#define D3DBLEND_UNK D3DSTENCILOP_FORCE_DWORD

464
GPU/GPUXbox.vcxproj Normal file
View File

@ -0,0 +1,464 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug_Optimised|Win32">
<Configuration>Debug_Optimised</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_Optimised|x64">
<Configuration>Debug_Optimised</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_Optimised|Xbox 360">
<Configuration>Debug_Optimised</Configuration>
<Platform>Xbox 360</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|Xbox 360">
<Configuration>Debug</Configuration>
<Platform>Xbox 360</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Xbox 360">
<Configuration>Release</Configuration>
<Platform>Xbox 360</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{DCC4F772-A6E5-4F54-9ACA-BD090CC971C5}</ProjectGuid>
<RootNamespace>GPU</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>false</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>false</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>false</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>false</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">
<OutputFile>$(OutDir)GPU$(TargetExt)</OutputFile>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<PreprocessorDefinitions>USE_DIRECTX;BIG_ENDIAN;PPC;_XBOX;_MBCS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<ForcedIncludeFiles>core/x360_compat.h</ForcedIncludeFiles>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Full</Optimization>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<PreprocessorDefinitions>USE_DIRECTX;BIG_ENDIAN;PPC;_XBOX;_MBCS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<ForcedIncludeFiles>core/x360_compat.h</ForcedIncludeFiles>
<BasicRuntimeChecks>Default</BasicRuntimeChecks>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<OmitFramePointers>false</OmitFramePointers>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<OmitFramePointers>false</OmitFramePointers>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<MinimalRebuild>false</MinimalRebuild>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Full</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
<FloatingPointModel>Fast</FloatingPointModel>
<PreprocessorDefinitions>USE_DIRECTX;WIN32;_XBOX;PPC;BIG_ENDIAN;NO_JIT;;%(PreprocessorDefinitions);_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<ForcedIncludeFiles>core/x360_compat.h</ForcedIncludeFiles>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>../common;..;../native;../native/ext/glew;</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>false</OmitFramePointers>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<RuntimeTypeInfo>false</RuntimeTypeInfo>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\ext\xbrz\xbrz.h" />
<ClInclude Include="Directx9\DisplayListInterpreter.h" />
<ClInclude Include="Directx9\FragmentShaderGenerator.h" />
<ClInclude Include="Directx9\Framebuffer.h" />
<ClInclude Include="Directx9\helper\dx_state.h" />
<ClInclude Include="Directx9\helper\fbo.h" />
<ClInclude Include="Directx9\helper\global.h" />
<ClInclude Include="Directx9\IndexGenerator.h" />
<ClInclude Include="Directx9\ShaderManager.h" />
<ClInclude Include="Directx9\StateMapping.h" />
<ClInclude Include="Directx9\TextureCache.h" />
<ClInclude Include="Directx9\TextureScaler.h" />
<ClInclude Include="Directx9\TransformPipeline.h" />
<ClInclude Include="Directx9\VertexDecoder.h" />
<ClInclude Include="Directx9\VertexShaderGenerator.h" />
<ClInclude Include="ge_constants.h" />
<ClInclude Include="GLES\DisplayListInterpreter.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\FragmentShaderGenerator.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\Framebuffer.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\IndexGenerator.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\ShaderManager.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\StateMapping.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\TextureCache.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\TextureScaler.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\TransformPipeline.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\VertexDecoder.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GLES\VertexShaderGenerator.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="GeDisasm.h" />
<ClInclude Include="GPUCommon.h" />
<ClInclude Include="GPUInterface.h" />
<ClInclude Include="GPUState.h" />
<ClInclude Include="Math3D.h" />
<ClInclude Include="Null\NullGpu.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
<ClCompile Include="Directx9\DisplayListInterpreter.cpp" />
<ClCompile Include="Directx9\FragmentShaderGenerator.cpp" />
<ClCompile Include="Directx9\Framebuffer.cpp" />
<ClCompile Include="Directx9\helper\dx_state.cpp" />
<ClCompile Include="Directx9\helper\fbo.cpp" />
<ClCompile Include="Directx9\helper\global.cpp" />
<ClCompile Include="Directx9\IndexGenerator.cpp" />
<ClCompile Include="Directx9\ShaderManager.cpp" />
<ClCompile Include="Directx9\StateMapping.cpp" />
<ClCompile Include="Directx9\TextureCache.cpp" />
<ClCompile Include="Directx9\TextureScaler.cpp" />
<ClCompile Include="Directx9\TransformPipeline.cpp" />
<ClCompile Include="Directx9\VertexDecoder.cpp" />
<ClCompile Include="Directx9\VertexShaderGenerator.cpp" />
<ClCompile Include="GLES\DisplayListInterpreter.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\FragmentShaderGenerator.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\Framebuffer.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\IndexGenerator.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\ShaderManager.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\StateMapping.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\TextureCache.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\TextureScaler.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\TransformPipeline.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\VertexDecoder.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">AssemblyAndSourceCode</AssemblerOutput>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GLES\VertexShaderGenerator.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_Optimised|Xbox 360'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Xbox 360'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GeDisasm.cpp" />
<ClCompile Include="GPUCommon.cpp" />
<ClCompile Include="GPUState.cpp" />
<ClCompile Include="Math3D.cpp" />
<ClCompile Include="Null\NullGpu.cpp" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Common\Common.vcxproj">
<Project>{3fcdbae2-5103-4350-9a8e-848ce9c73195}</Project>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

214
GPU/GPUXbox.vcxproj.filters Normal file
View File

@ -0,0 +1,214 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="GLES">
<UniqueIdentifier>{f7563dba-8146-4c21-a092-e864ff145d79}</UniqueIdentifier>
</Filter>
<Filter Include="Software">
<UniqueIdentifier>{4f6d1284-2c23-4ebc-842c-666a1305bfed}</UniqueIdentifier>
</Filter>
<Filter Include="Common">
<UniqueIdentifier>{21783292-4dd7-447b-af93-356cd2eaa4d6}</UniqueIdentifier>
</Filter>
<Filter Include="Null">
<UniqueIdentifier>{b31aa5a1-da08-47e6-9467-ab1d547b6ff3}</UniqueIdentifier>
</Filter>
<Filter Include="Directx9">
<UniqueIdentifier>{eb2a1d3d-24c7-4df8-b3cb-79a4b9734d70}</UniqueIdentifier>
</Filter>
<Filter Include="Directx9\helper">
<UniqueIdentifier>{862f23b4-2c1b-4d16-9450-caecbb77f276}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ge_constants.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="Math3D.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="GLES\DisplayListInterpreter.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\FragmentShaderGenerator.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\Framebuffer.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\ShaderManager.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\TextureCache.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\TransformPipeline.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\VertexDecoder.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\VertexShaderGenerator.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GPUState.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="GPUInterface.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="Null\NullGpu.h">
<Filter>Null</Filter>
</ClInclude>
<ClInclude Include="GLES\StateMapping.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GLES\IndexGenerator.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="GeDisasm.h" />
<ClInclude Include="GPUCommon.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="..\ext\xbrz\xbrz.h" />
<ClInclude Include="GLES\TextureScaler.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="Directx9\DisplayListInterpreter.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\FragmentShaderGenerator.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\Framebuffer.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\IndexGenerator.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\ShaderManager.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\StateMapping.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\TextureCache.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\TextureScaler.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\TransformPipeline.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\VertexDecoder.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\VertexShaderGenerator.h">
<Filter>Directx9</Filter>
</ClInclude>
<ClInclude Include="Directx9\helper\fbo.h">
<Filter>Directx9\helper</Filter>
</ClInclude>
<ClInclude Include="Directx9\helper\global.h">
<Filter>Directx9\helper</Filter>
</ClInclude>
<ClInclude Include="Directx9\helper\dx_state.h">
<Filter>Directx9\helper</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="GLES\DisplayListInterpreter.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\FragmentShaderGenerator.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\Framebuffer.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\ShaderManager.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\TextureCache.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\TransformPipeline.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\VertexDecoder.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\VertexShaderGenerator.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GPUState.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="Null\NullGpu.cpp">
<Filter>Null</Filter>
</ClCompile>
<ClCompile Include="GLES\StateMapping.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GLES\IndexGenerator.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="GeDisasm.cpp" />
<ClCompile Include="GPUCommon.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="..\ext\xbrz\xbrz.cpp" />
<ClCompile Include="GLES\TextureScaler.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="Directx9\DisplayListInterpreter.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\FragmentShaderGenerator.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\Framebuffer.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\IndexGenerator.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\ShaderManager.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\StateMapping.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\TextureCache.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\TextureScaler.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\TransformPipeline.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\VertexDecoder.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\VertexShaderGenerator.cpp">
<Filter>Directx9</Filter>
</ClCompile>
<ClCompile Include="Directx9\helper\fbo.cpp">
<Filter>Directx9\helper</Filter>
</ClCompile>
<ClCompile Include="Directx9\helper\dx_state.cpp">
<Filter>Directx9\helper</Filter>
</ClCompile>
<ClCompile Include="Directx9\helper\global.cpp">
<Filter>Directx9\helper</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
</ItemGroup>
</Project>