mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Start stubbing out a new D3D11 backend
This commit is contained in:
parent
9dd3e18ed4
commit
175b97ef34
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include "base/basictypes.h"
|
||||
|
||||
// TODO: There will be additional bits, indicating that groups of these will be
|
||||
@ -93,9 +95,9 @@ struct ShaderID {
|
||||
}
|
||||
}
|
||||
|
||||
u32 d[2];
|
||||
uint32_t d[2];
|
||||
bool operator < (const ShaderID &other) const {
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(uint32_t); i++) {
|
||||
if (d[i] < other.d[i])
|
||||
return true;
|
||||
if (d[i] > other.d[i])
|
||||
@ -104,7 +106,7 @@ struct ShaderID {
|
||||
return false;
|
||||
}
|
||||
bool operator == (const ShaderID &other) const {
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
|
||||
for (size_t i = 0; i < sizeof(d) / sizeof(uint32_t); i++) {
|
||||
if (d[i] != other.d[i])
|
||||
return false;
|
||||
}
|
||||
@ -142,7 +144,7 @@ struct ShaderID {
|
||||
|
||||
|
||||
bool CanUseHardwareTransform(int prim);
|
||||
void ComputeVertexShaderID(ShaderID *id, u32 vertexType, bool useHWTransform);
|
||||
void ComputeVertexShaderID(ShaderID *id, uint32_t vertexType, bool useHWTransform);
|
||||
// Generates a compact string that describes the shader. Useful in a list to get an overview
|
||||
// of the current flora of shaders.
|
||||
std::string VertexShaderDesc(const ShaderID &id);
|
||||
|
215
GPU/Common/ShaderUniforms.cpp
Normal file
215
GPU/Common/ShaderUniforms.cpp
Normal file
@ -0,0 +1,215 @@
|
||||
#include "ShaderUniforms.h"
|
||||
#include "math/dataconv.h"
|
||||
#include "math/lin/matrix4x4.h"
|
||||
#include "math/math_util.h"
|
||||
#include "math/lin/vec3.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Math3D.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
|
||||
const Vec3 trans(0, 0, gstate_c.vpZOffset * 0.5f + 0.5f);
|
||||
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
|
||||
in.translateAndScale(trans, scale);
|
||||
}
|
||||
|
||||
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms) {
|
||||
if (dirtyUniforms & DIRTY_TEXENV) {
|
||||
Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
|
||||
Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
|
||||
Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_FOGCOLOR) {
|
||||
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_SHADERBLEND) {
|
||||
Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
|
||||
Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_TEXCLAMP) {
|
||||
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
|
||||
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
|
||||
const int w = gstate.getTextureWidth(0);
|
||||
const int h = gstate.getTextureHeight(0);
|
||||
const float widthFactor = (float)w * invW;
|
||||
const float heightFactor = (float)h * invH;
|
||||
|
||||
// First wrap xy, then half texel xy (for clamp.)
|
||||
ub->texClamp[0] = widthFactor;
|
||||
ub->texClamp[1] = heightFactor;
|
||||
ub->texClamp[2] = invW * 0.5f;
|
||||
ub->texClamp[3] = invH * 0.5f;
|
||||
ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
|
||||
ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_PROJMATRIX) {
|
||||
Matrix4x4 flippedMatrix;
|
||||
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
|
||||
|
||||
const bool invertedY = gstate_c.vpHeight < 0;
|
||||
if (invertedY) {
|
||||
flippedMatrix[1] = -flippedMatrix[1];
|
||||
flippedMatrix[5] = -flippedMatrix[5];
|
||||
flippedMatrix[9] = -flippedMatrix[9];
|
||||
flippedMatrix[13] = -flippedMatrix[13];
|
||||
}
|
||||
const bool invertedX = gstate_c.vpWidth < 0;
|
||||
if (invertedX) {
|
||||
flippedMatrix[0] = -flippedMatrix[0];
|
||||
flippedMatrix[4] = -flippedMatrix[4];
|
||||
flippedMatrix[8] = -flippedMatrix[8];
|
||||
flippedMatrix[12] = -flippedMatrix[12];
|
||||
}
|
||||
ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
|
||||
CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
|
||||
Matrix4x4 proj_through;
|
||||
proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
|
||||
CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
|
||||
}
|
||||
|
||||
// Transform
|
||||
if (dirtyUniforms & DIRTY_WORLDMATRIX) {
|
||||
ConvertMatrix4x3To4x4(ub->world, gstate.worldMatrix);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_VIEWMATRIX) {
|
||||
ConvertMatrix4x3To4x4(ub->view, gstate.viewMatrix);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_TEXMATRIX) {
|
||||
ConvertMatrix4x3To4x4(ub->tex, gstate.tgenMatrix);
|
||||
}
|
||||
|
||||
// Combined two small uniforms
|
||||
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
|
||||
float fogcoef_stencil[3] = {
|
||||
getFloat24(gstate.fog1),
|
||||
getFloat24(gstate.fog2),
|
||||
(float)gstate.getStencilTestRef()
|
||||
};
|
||||
if (my_isinf(fogcoef_stencil[1])) {
|
||||
// not really sure what a sensible value might be.
|
||||
fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
|
||||
} else if (my_isnan(fogcoef_stencil[1])) {
|
||||
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
|
||||
// Just put the fog far away at a large finite distance.
|
||||
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
|
||||
// so it's best to just make it a sane calculation.
|
||||
fogcoef_stencil[0] = 100000.0f;
|
||||
fogcoef_stencil[1] = 1.0f;
|
||||
}
|
||||
#ifndef MOBILE_DEVICE
|
||||
else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
|
||||
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
|
||||
}
|
||||
#endif
|
||||
CopyFloat3(ub->fogCoef_stencil, fogcoef_stencil);
|
||||
}
|
||||
|
||||
// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
|
||||
if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
|
||||
Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
|
||||
}
|
||||
|
||||
// Texturing
|
||||
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
|
||||
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
|
||||
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
|
||||
const int w = gstate.getTextureWidth(0);
|
||||
const int h = gstate.getTextureHeight(0);
|
||||
const float widthFactor = (float)w * invW;
|
||||
const float heightFactor = (float)h * invH;
|
||||
ub->uvScaleOffset[0] = widthFactor;
|
||||
ub->uvScaleOffset[1] = heightFactor;
|
||||
ub->uvScaleOffset[2] = 0.0f;
|
||||
ub->uvScaleOffset[3] = 0.0f;
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
|
||||
float viewZScale = gstate.getViewportZScale();
|
||||
float viewZCenter = gstate.getViewportZCenter();
|
||||
float viewZInvScale;
|
||||
|
||||
// We had to scale and translate Z to account for our clamped Z range.
|
||||
// Therefore, we also need to reverse this to round properly.
|
||||
//
|
||||
// Example: scale = 65535.0, center = 0.0
|
||||
// Resulting range = -65535 to 65535, clamped to [0, 65535]
|
||||
// gstate_c.vpDepthScale = 2.0f
|
||||
// gstate_c.vpZOffset = -1.0f
|
||||
//
|
||||
// The projection already accounts for those, so we need to reverse them.
|
||||
//
|
||||
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
|
||||
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
|
||||
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
|
||||
|
||||
if (viewZScale != 0.0) {
|
||||
viewZInvScale = 1.0f / viewZScale;
|
||||
} else {
|
||||
viewZInvScale = 0.0;
|
||||
}
|
||||
|
||||
ub->depthRange[0] = viewZScale;
|
||||
ub->depthRange[1] = viewZCenter;
|
||||
ub->depthRange[2] = viewZCenter;
|
||||
ub->depthRange[3] = viewZInvScale;
|
||||
}
|
||||
}
|
||||
|
||||
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
|
||||
// Lighting
|
||||
if (dirtyUniforms & DIRTY_AMBIENT) {
|
||||
Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_MATDIFFUSE) {
|
||||
Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_MATEMISSIVE) {
|
||||
Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_MATSPECULAR) {
|
||||
Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
|
||||
if (gstate.isDirectionalLight(i)) {
|
||||
// Prenormalize
|
||||
float x = getFloat24(gstate.lpos[i * 3 + 0]);
|
||||
float y = getFloat24(gstate.lpos[i * 3 + 1]);
|
||||
float z = getFloat24(gstate.lpos[i * 3 + 2]);
|
||||
float len = sqrtf(x*x + y*y + z*z);
|
||||
if (len == 0.0f)
|
||||
len = 1.0f;
|
||||
else
|
||||
len = 1.0f / len;
|
||||
float vec[3] = { x * len, y * len, z * len };
|
||||
CopyFloat3To4(ub->lpos[i], vec);
|
||||
} else {
|
||||
ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
|
||||
}
|
||||
ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
|
||||
ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
|
||||
CopyFloat1To4(ub->lightAngle[i], getFloat24(gstate.lcutoff[i]));
|
||||
CopyFloat1To4(ub->lightSpotCoef[i], getFloat24(gstate.lconv[i]));
|
||||
Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
|
||||
Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
|
||||
Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
ConvertMatrix4x3To4x4(ub->bones[i], gstate.boneMatrix + 12 * i);
|
||||
}
|
||||
}
|
||||
}
|
147
GPU/Common/ShaderUniforms.h
Normal file
147
GPU/Common/ShaderUniforms.h
Normal file
@ -0,0 +1,147 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "ShaderCommon.h"
|
||||
|
||||
// Used by the "modern" backends that use uniform buffers. They can share this without issue.
|
||||
|
||||
// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
|
||||
enum : uint64_t {
|
||||
DIRTY_BASE_UNIFORMS =
|
||||
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
|
||||
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
|
||||
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
|
||||
DIRTY_LIGHT_UNIFORMS =
|
||||
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
|
||||
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
|
||||
};
|
||||
|
||||
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
|
||||
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
|
||||
struct UB_VS_FS_Base {
|
||||
float proj[16];
|
||||
float proj_through[16];
|
||||
float view[16];
|
||||
float world[16];
|
||||
float tex[16]; // not that common, may want to break out
|
||||
float uvScaleOffset[4];
|
||||
float depthRange[4];
|
||||
float fogCoef_stencil[4];
|
||||
float matAmbient[4];
|
||||
// Fragment data
|
||||
float fogColor[4];
|
||||
float texEnvColor[4];
|
||||
int alphaColorRef[4];
|
||||
int colorTestMask[4];
|
||||
float blendFixA[4];
|
||||
float blendFixB[4];
|
||||
float texClamp[4];
|
||||
float texClampOffset[4];
|
||||
};
|
||||
|
||||
static const char *ub_baseStr =
|
||||
R"( mat4 proj_mtx;
|
||||
mat4 proj_through_mtx;
|
||||
mat4 view_mtx;
|
||||
mat4 world_mtx;
|
||||
mat4 tex_mtx;
|
||||
vec4 uvscaleoffset;
|
||||
vec4 depthRange;
|
||||
vec3 fogcoef_stencilreplace;
|
||||
vec4 matambientalpha;
|
||||
vec3 fogcolor;
|
||||
vec3 texenv;
|
||||
ivec4 alphacolorref;
|
||||
ivec4 alphacolormask;
|
||||
vec3 blendFixA;
|
||||
vec3 blendFixB;
|
||||
vec4 texclamp;
|
||||
vec2 texclampoff;
|
||||
)";
|
||||
|
||||
static const char *cb_baseStr =
|
||||
R"( matrix proj_mtx;
|
||||
matrix proj_through_mtx;
|
||||
matrix view_mtx;
|
||||
matrix world_mtx;
|
||||
matrix tex_mtx;
|
||||
float4 uvscaleoffset;
|
||||
float4 depthRange;
|
||||
float3 fogcoef_stencilreplace;
|
||||
float4 matambientalpha;
|
||||
float3 fogcolor;
|
||||
float3 texenv;
|
||||
ifloat4 alphacolorref;
|
||||
ifloat4 alphacolormask;
|
||||
float3 blendFixA;
|
||||
float3 blendFixB;
|
||||
float4 texclamp;
|
||||
float2 texclampoff;
|
||||
)";
|
||||
|
||||
// 576 bytes. Can we get down to 512?
|
||||
struct UB_VS_Lights {
|
||||
float ambientColor[4];
|
||||
float materialDiffuse[4];
|
||||
float materialSpecular[4];
|
||||
float materialEmissive[4];
|
||||
float lpos[4][4];
|
||||
float ldir[4][4];
|
||||
float latt[4][4];
|
||||
float lightAngle[4][4]; // TODO: Merge with lightSpotCoef, use .xy
|
||||
float lightSpotCoef[4][4];
|
||||
float lightAmbient[4][4];
|
||||
float lightDiffuse[4][4];
|
||||
float lightSpecular[4][4];
|
||||
};
|
||||
|
||||
static const char *ub_vs_lightsStr =
|
||||
R"( vec4 globalAmbient;
|
||||
vec3 matdiffuse;
|
||||
vec4 matspecular;
|
||||
vec3 matemissive;
|
||||
vec3 pos[4];
|
||||
vec3 dir[4];
|
||||
vec3 att[4];
|
||||
float angle[4];
|
||||
float spotCoef[4];
|
||||
vec3 ambient[4];
|
||||
vec3 diffuse[4];
|
||||
vec3 specular[4];
|
||||
)";
|
||||
|
||||
static const char *cb_vs_lightsStr =
|
||||
R"( float4 globalAmbient;
|
||||
float3 matdiffuse;
|
||||
float4 matspecular;
|
||||
float3 matemissive;
|
||||
float3 pos[4];
|
||||
float3 dir[4];
|
||||
float3 att[4];
|
||||
float angle[4];
|
||||
float spotCoef[4];
|
||||
float3 ambient[4];
|
||||
float3 diffuse[4];
|
||||
float3 specular[4];
|
||||
)";
|
||||
|
||||
// With some cleverness, we could get away with uploading just half this when only the four first
|
||||
// bones are being used. This is 512b, 256b would be great.
|
||||
// Could also move to 4x3 matrices - would let us fit 5 bones into 256b.
|
||||
struct UB_VS_Bones {
|
||||
float bones[8][16];
|
||||
};
|
||||
|
||||
static const char *ub_vs_bonesStr =
|
||||
R"( mat4 m[8];
|
||||
)";
|
||||
|
||||
static const char *cb_vs_bonesStr =
|
||||
R"( matrix m[8];
|
||||
)";
|
||||
|
||||
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms);
|
||||
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
|
||||
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
|
||||
|
5
GPU/D3D11/FragmentShaderGeneratorD3D11.cpp
Normal file
5
GPU/D3D11/FragmentShaderGeneratorD3D11.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
#include "GPU/D3D11/FragmentShaderGeneratorD3D11.h"
|
||||
|
||||
void GenerateFragmentShaderD3D11(const ShaderID &id, char *buffer) {
|
||||
|
||||
}
|
5
GPU/D3D11/FragmentShaderGeneratorD3D11.h
Normal file
5
GPU/D3D11/FragmentShaderGeneratorD3D11.h
Normal file
@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
|
||||
void GenerateFragmentShaderD3D11(const ShaderID &id, char *buffer);
|
273
GPU/D3D11/ShaderManagerD3D11.cpp
Normal file
273
GPU/D3D11/ShaderManagerD3D11.cpp
Normal file
@ -0,0 +1,273 @@
|
||||
// Copyright (c) 2015- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#ifdef _WIN32
|
||||
#define SHADERLOG
|
||||
#endif
|
||||
|
||||
#include <d3d11.h>
|
||||
#include <d3dcompiler.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "math/lin/matrix4x4.h"
|
||||
#include "math/math_util.h"
|
||||
#include "math/dataconv.h"
|
||||
#include "util/text/utf8.h"
|
||||
#include "thin3d/d3d11_loader.h"
|
||||
#include "Common/Common.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/Math3D.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/D3D11/ShaderManagerD3D11.h"
|
||||
#include "GPU/D3D11/FragmentShaderGeneratorD3D11.h"
|
||||
#include "GPU/D3D11/VertexShaderGeneratorD3D11.h"
|
||||
|
||||
D3D11FragmentShader::D3D11FragmentShader(ID3D11Device *device, ShaderID id, const char *code, bool useHWTransform)
|
||||
: device_(device), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(0) {
|
||||
source_ = code;
|
||||
|
||||
std::string errorMessage;
|
||||
|
||||
#ifdef SHADERLOG
|
||||
OutputDebugStringA(code);
|
||||
#endif
|
||||
|
||||
uint8_t *bytecode;
|
||||
UINT bytecodeSize;
|
||||
|
||||
HRESULT hr = device_->CreatePixelShader(bytecode, bytecodeSize, nullptr, &module_);
|
||||
if (FAILED(hr)) {
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
D3D11FragmentShader::~D3D11FragmentShader() {
|
||||
if (module_)
|
||||
module_->Release();
|
||||
}
|
||||
|
||||
std::string D3D11FragmentShader::GetShaderString(DebugShaderStringType type) const {
|
||||
switch (type) {
|
||||
case SHADER_STRING_SOURCE_CODE:
|
||||
return source_;
|
||||
case SHADER_STRING_SHORT_DESC:
|
||||
return FragmentShaderDesc(id_);
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
D3D11VertexShader::D3D11VertexShader(ID3D11Device *device, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting)
|
||||
: device_(device), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(nullptr), usesLighting_(usesLighting) {
|
||||
source_ = code;
|
||||
std::string errorMessage;
|
||||
std::vector<uint32_t> spirv;
|
||||
#ifdef SHADERLOG
|
||||
OutputDebugStringA(code);
|
||||
#endif
|
||||
uint8_t *bytecode;
|
||||
UINT bytecodeSize;
|
||||
|
||||
HRESULT hr = device_->CreateVertexShader(bytecode, bytecodeSize, nullptr, &module_);
|
||||
if (FAILED(hr)) {
|
||||
failed_ = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
D3D11VertexShader::~D3D11VertexShader() {
|
||||
if (module_)
|
||||
module_->Release();
|
||||
}
|
||||
|
||||
std::string D3D11VertexShader::GetShaderString(DebugShaderStringType type) const {
|
||||
switch (type) {
|
||||
case SHADER_STRING_SOURCE_CODE:
|
||||
return source_;
|
||||
case SHADER_STRING_SHORT_DESC:
|
||||
return VertexShaderDesc(id_);
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
ShaderManagerD3D11::ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context)
|
||||
: device_(device), context_(context), lastVShader_(nullptr), lastFShader_(nullptr) {
|
||||
codeBuffer_ = new char[16384];
|
||||
memset(&ub_base, 0, sizeof(ub_base));
|
||||
memset(&ub_lights, 0, sizeof(ub_lights));
|
||||
memset(&ub_bones, 0, sizeof(ub_bones));
|
||||
|
||||
ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
|
||||
ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
|
||||
ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
|
||||
}
|
||||
|
||||
ShaderManagerD3D11::~ShaderManagerD3D11() {
|
||||
ClearShaders();
|
||||
delete[] codeBuffer_;
|
||||
}
|
||||
|
||||
void ShaderManagerD3D11::Clear() {
|
||||
for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) {
|
||||
delete iter->second;
|
||||
}
|
||||
for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) {
|
||||
delete iter->second;
|
||||
}
|
||||
fsCache_.clear();
|
||||
vsCache_.clear();
|
||||
lastFSID_.clear();
|
||||
lastVSID_.clear();
|
||||
}
|
||||
|
||||
void ShaderManagerD3D11::ClearShaders() {
|
||||
Clear();
|
||||
DirtyShader();
|
||||
gstate_c.Dirty(DIRTY_ALL_UNIFORMS);
|
||||
}
|
||||
|
||||
void ShaderManagerD3D11::DirtyShader() {
|
||||
// Forget the last shader ID
|
||||
lastFSID_.clear();
|
||||
lastVSID_.clear();
|
||||
lastVShader_ = nullptr;
|
||||
lastFShader_ = nullptr;
|
||||
}
|
||||
|
||||
void ShaderManagerD3D11::DirtyLastShader() { // disables vertex arrays
|
||||
lastVShader_ = nullptr;
|
||||
lastFShader_ = nullptr;
|
||||
}
|
||||
|
||||
uint64_t ShaderManagerD3D11::UpdateUniforms() {
|
||||
uint64_t dirty = gstate_c.GetDirtyUniforms();
|
||||
if (dirty != 0) {
|
||||
if (dirty & DIRTY_BASE_UNIFORMS)
|
||||
BaseUpdateUniforms(&ub_base, dirty);
|
||||
if (dirty & DIRTY_LIGHT_UNIFORMS)
|
||||
LightUpdateUniforms(&ub_lights, dirty);
|
||||
if (dirty & DIRTY_BONE_UNIFORMS)
|
||||
BoneUpdateUniforms(&ub_bones, dirty);
|
||||
}
|
||||
gstate_c.CleanUniforms();
|
||||
return dirty;
|
||||
}
|
||||
|
||||
void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform) {
|
||||
ShaderID VSID;
|
||||
ShaderID FSID;
|
||||
ComputeVertexShaderID(&VSID, vertType, useHWTransform);
|
||||
ComputeFragmentShaderID(&FSID);
|
||||
|
||||
// Just update uniforms if this is the same shader as last time.
|
||||
if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) {
|
||||
*vshader = lastVShader_;
|
||||
*fshader = lastFShader_;
|
||||
// Already all set, no need to look up in shader maps.
|
||||
return;
|
||||
}
|
||||
|
||||
VSCache::iterator vsIter = vsCache_.find(VSID);
|
||||
D3D11VertexShader *vs;
|
||||
if (vsIter == vsCache_.end()) {
|
||||
// Vertex shader not in cache. Let's compile it.
|
||||
bool usesLighting;
|
||||
GenerateVertexShaderD3D11(VSID, codeBuffer_, &usesLighting);
|
||||
vs = new D3D11VertexShader(device_, VSID, codeBuffer_, vertType, useHWTransform, usesLighting);
|
||||
vsCache_[VSID] = vs;
|
||||
} else {
|
||||
vs = vsIter->second;
|
||||
}
|
||||
lastVSID_ = VSID;
|
||||
|
||||
FSCache::iterator fsIter = fsCache_.find(FSID);
|
||||
D3D11FragmentShader *fs;
|
||||
if (fsIter == fsCache_.end()) {
|
||||
// Fragment shader not in cache. Let's compile it.
|
||||
GenerateFragmentShaderD3D11(FSID, codeBuffer_);
|
||||
fs = new D3D11FragmentShader(device_, FSID, codeBuffer_, useHWTransform);
|
||||
fsCache_[FSID] = fs;
|
||||
} else {
|
||||
fs = fsIter->second;
|
||||
}
|
||||
|
||||
lastFSID_ = FSID;
|
||||
|
||||
lastVShader_ = vs;
|
||||
lastFShader_ = fs;
|
||||
|
||||
*vshader = vs;
|
||||
*fshader = fs;
|
||||
}
|
||||
|
||||
std::vector<std::string> ShaderManagerD3D11::DebugGetShaderIDs(DebugShaderType type) {
|
||||
std::string id;
|
||||
std::vector<std::string> ids;
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEX:
|
||||
{
|
||||
for (auto iter : vsCache_) {
|
||||
iter.first.ToString(&id);
|
||||
ids.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SHADER_TYPE_FRAGMENT:
|
||||
{
|
||||
for (auto iter : fsCache_) {
|
||||
iter.first.ToString(&id);
|
||||
ids.push_back(id);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::string ShaderManagerD3D11::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
|
||||
ShaderID shaderId;
|
||||
shaderId.FromString(id);
|
||||
switch (type) {
|
||||
case SHADER_TYPE_VERTEX:
|
||||
{
|
||||
auto iter = vsCache_.find(shaderId);
|
||||
if (iter == vsCache_.end()) {
|
||||
return "";
|
||||
}
|
||||
return iter->second->GetShaderString(stringType);
|
||||
}
|
||||
|
||||
case SHADER_TYPE_FRAGMENT:
|
||||
{
|
||||
auto iter = fsCache_.find(shaderId);
|
||||
if (iter == fsCache_.end()) {
|
||||
return "";
|
||||
}
|
||||
return iter->second->GetShaderString(stringType);
|
||||
}
|
||||
default:
|
||||
return "N/A";
|
||||
}
|
||||
}
|
145
GPU/D3D11/ShaderManagerD3D11.h
Normal file
145
GPU/D3D11/ShaderManagerD3D11.h
Normal file
@ -0,0 +1,145 @@
|
||||
// Copyright (c) 2017- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
#include "base/basictypes.h"
|
||||
#include "Globals.h"
|
||||
#include "GPU/Common/ShaderCommon.h"
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
// #include "GPU/DX9/VertexShaderGeneratorD3D11.h"
|
||||
// #include "GPU/DX9/FragmentShaderGeneratorD3D11.h"
|
||||
#include "math/lin/matrix4x4.h"
|
||||
#include "GPU/Common/ShaderUniforms.h"
|
||||
|
||||
class D3D11Context;
|
||||
class D3D11PushBuffer;
|
||||
|
||||
class D3D11FragmentShader {
|
||||
public:
|
||||
D3D11FragmentShader(ID3D11Device *device, ShaderID id, const char *code, bool useHWTransform);
|
||||
~D3D11FragmentShader();
|
||||
|
||||
const std::string &source() const { return source_; }
|
||||
|
||||
bool Failed() const { return failed_; }
|
||||
bool UseHWTransform() const { return useHWTransform_; }
|
||||
|
||||
std::string GetShaderString(DebugShaderStringType type) const;
|
||||
ID3D11PixelShader *GetShader() const { return module_; }
|
||||
|
||||
protected:
|
||||
ID3D11PixelShader *module_;
|
||||
|
||||
ID3D11Device *device_;
|
||||
std::string source_;
|
||||
bool failed_;
|
||||
bool useHWTransform_;
|
||||
ShaderID id_;
|
||||
};
|
||||
|
||||
class D3D11VertexShader {
|
||||
public:
|
||||
D3D11VertexShader(ID3D11Device *device, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting);
|
||||
~D3D11VertexShader();
|
||||
|
||||
const std::string &source() const { return source_; }
|
||||
|
||||
bool Failed() const { return failed_; }
|
||||
bool UseHWTransform() const { return useHWTransform_; }
|
||||
bool HasBones() const {
|
||||
return id_.Bit(VS_BIT_ENABLE_BONES);
|
||||
}
|
||||
bool HasLights() const {
|
||||
return usesLighting_;
|
||||
}
|
||||
|
||||
std::string GetShaderString(DebugShaderStringType type) const;
|
||||
ID3D11VertexShader *GetModule() const { return module_; }
|
||||
|
||||
protected:
|
||||
ID3D11VertexShader *module_;
|
||||
|
||||
ID3D11Device *device_;
|
||||
std::string source_;
|
||||
bool failed_;
|
||||
bool useHWTransform_;
|
||||
bool usesLighting_;
|
||||
ShaderID id_;
|
||||
};
|
||||
|
||||
class D3D11PushBuffer;
|
||||
|
||||
class ShaderManagerD3D11 : public ShaderManagerCommon {
|
||||
public:
|
||||
ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context);
|
||||
~ShaderManagerD3D11();
|
||||
|
||||
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform);
|
||||
void ClearShaders();
|
||||
void DirtyShader();
|
||||
void DirtyLastShader();
|
||||
|
||||
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
|
||||
int GetNumFragmentShaders() const { return (int)fsCache_.size(); }
|
||||
|
||||
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
|
||||
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
|
||||
|
||||
uint64_t UpdateUniforms();
|
||||
|
||||
// TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer.
|
||||
// Applies dirty changes and copies the buffer.
|
||||
bool IsBaseDirty() { return true; }
|
||||
bool IsLightDirty() { return true; }
|
||||
bool IsBoneDirty() { return true; }
|
||||
|
||||
/*
|
||||
uint32_t PushBaseBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
|
||||
uint32_t PushLightBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
|
||||
uint32_t PushBoneBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
|
||||
*/
|
||||
|
||||
private:
|
||||
void Clear();
|
||||
|
||||
ID3D11Device *device_;
|
||||
ID3D11DeviceContext *context_;
|
||||
|
||||
typedef std::map<ShaderID, D3D11FragmentShader *> FSCache;
|
||||
FSCache fsCache_;
|
||||
|
||||
typedef std::map<ShaderID, D3D11VertexShader *> VSCache;
|
||||
VSCache vsCache_;
|
||||
|
||||
char *codeBuffer_;
|
||||
|
||||
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
|
||||
UB_VS_FS_Base ub_base;
|
||||
UB_VS_Lights ub_lights;
|
||||
UB_VS_Bones ub_bones;
|
||||
|
||||
D3D11FragmentShader *lastFShader_;
|
||||
D3D11VertexShader *lastVShader_;
|
||||
|
||||
ShaderID lastFSID_;
|
||||
ShaderID lastVSID_;
|
||||
};
|
379
GPU/D3D11/StateMappingD3D11.cpp
Normal file
379
GPU/D3D11/StateMappingD3D11.cpp
Normal file
@ -0,0 +1,379 @@
|
||||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <d3d11.h>
|
||||
|
||||
#include "GPU/Math3D.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/Common/GPUStateUtils.h"
|
||||
#include "Core/System.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
#include "GPU/Common/FramebufferCommon.h"
|
||||
|
||||
// These tables all fit into u8s.
|
||||
static const D3D11_BLEND d3d11BlendFactorLookup[(size_t)BlendFactor::COUNT] = {
|
||||
D3D11_BLEND_ZERO,
|
||||
D3D11_BLEND_ONE,
|
||||
D3D11_BLEND_SRC_COLOR,
|
||||
D3D11_BLEND_INV_SRC_COLOR,
|
||||
D3D11_BLEND_DEST_COLOR,
|
||||
D3D11_BLEND_INV_DEST_COLOR,
|
||||
D3D11_BLEND_SRC_ALPHA,
|
||||
D3D11_BLEND_INV_SRC_ALPHA,
|
||||
D3D11_BLEND_DEST_ALPHA,
|
||||
D3D11_BLEND_INV_DEST_ALPHA,
|
||||
D3D11_BLEND_BLEND_FACTOR,
|
||||
D3D11_BLEND_INV_BLEND_FACTOR,
|
||||
D3D11_BLEND_BLEND_FACTOR,
|
||||
D3D11_BLEND_INV_BLEND_FACTOR,
|
||||
D3D11_BLEND_SRC1_COLOR,
|
||||
D3D11_BLEND_INV_SRC1_COLOR,
|
||||
D3D11_BLEND_SRC1_ALPHA,
|
||||
D3D11_BLEND_INV_SRC1_ALPHA,
|
||||
};
|
||||
|
||||
static const D3D11_BLEND_OP d3d11BlendEqLookup[(size_t)BlendEq::COUNT] = {
|
||||
D3D11_BLEND_OP_ADD,
|
||||
D3D11_BLEND_OP_SUBTRACT,
|
||||
D3D11_BLEND_OP_REV_SUBTRACT,
|
||||
D3D11_BLEND_OP_MIN,
|
||||
D3D11_BLEND_OP_MAX,
|
||||
};
|
||||
|
||||
static const D3D11_CULL_MODE cullingMode[] = {
|
||||
D3D11_CULL_BACK,
|
||||
D3D11_CULL_FRONT,
|
||||
};
|
||||
|
||||
static const D3D11_COMPARISON_FUNC compareOps[] = {
|
||||
D3D11_COMPARISON_NEVER,
|
||||
D3D11_COMPARISON_ALWAYS,
|
||||
D3D11_COMPARISON_EQUAL,
|
||||
D3D11_COMPARISON_NOT_EQUAL,
|
||||
D3D11_COMPARISON_LESS,
|
||||
D3D11_COMPARISON_LESS_EQUAL,
|
||||
D3D11_COMPARISON_GREATER,
|
||||
D3D11_COMPARISON_GREATER_EQUAL,
|
||||
};
|
||||
|
||||
static const D3D11_STENCIL_OP stencilOps[] = {
|
||||
D3D11_STENCIL_OP_KEEP,
|
||||
D3D11_STENCIL_OP_ZERO,
|
||||
D3D11_STENCIL_OP_REPLACE,
|
||||
D3D11_STENCIL_OP_INVERT,
|
||||
D3D11_STENCIL_OP_INCR_SAT,
|
||||
D3D11_STENCIL_OP_DECR_SAT,
|
||||
D3D11_STENCIL_OP_KEEP, // reserved
|
||||
D3D11_STENCIL_OP_KEEP, // reserved
|
||||
};
|
||||
|
||||
static const D3D11_PRIMITIVE_TOPOLOGY primToD3D11[8] = {
|
||||
D3D11_PRIMITIVE_TOPOLOGY_POINTLIST,
|
||||
D3D11_PRIMITIVE_TOPOLOGY_LINELIST,
|
||||
D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP,
|
||||
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
|
||||
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
|
||||
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, // D3D11 doesn't do triangle fans.
|
||||
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
|
||||
};
|
||||
|
||||
// These are actually the same exact values/order/etc. as the GE ones, but for clarity...
|
||||
/*
|
||||
static const D3D11_LOGIC_OP logicOps[] = {
|
||||
D3D11_LOGIC_OP_CLEAR,
|
||||
D3D11_LOGIC_OP_AND,
|
||||
D3D11_LOGIC_OP_AND_REVERSE,
|
||||
D3D11_LOGIC_OP_COPY,
|
||||
D3D11_LOGIC_OP_AND_INVERTED,
|
||||
D3D11_LOGIC_OP_NO_OP,
|
||||
D3D11_LOGIC_OP_XOR,
|
||||
D3D11_LOGIC_OP_OR,
|
||||
D3D11_LOGIC_OP_NOR,
|
||||
D3D11_LOGIC_OP_EQUIVALENT,
|
||||
D3D11_LOGIC_OP_INVERT,
|
||||
D3D11_LOGIC_OP_OR_REVERSE,
|
||||
D3D11_LOGIC_OP_COPY_INVERTED,
|
||||
D3D11_LOGIC_OP_OR_INVERTED,
|
||||
D3D11_LOGIC_OP_NAND,
|
||||
D3D11_LOGIC_OP_SET,
|
||||
};
|
||||
*/
|
||||
|
||||
static bool ApplyShaderBlending() {
|
||||
return false;
|
||||
}
|
||||
|
||||
static void ResetShaderBlending() {
|
||||
//
|
||||
}
|
||||
|
||||
class FramebufferManagerD3D11;
|
||||
class ShaderManagerD3D11;
|
||||
|
||||
// TODO: Do this more progressively. No need to compute the entire state if the entire state hasn't changed.
|
||||
// In Vulkan, we simply collect all the state together into a "pipeline key" - we don't actually set any state here
|
||||
// (the caller is responsible for setting the little dynamic state that is supported, dynState).
|
||||
|
||||
struct D3D11BlendKey {
|
||||
// Blend
|
||||
unsigned int blendEnable : 1;
|
||||
unsigned int srcColor : 5; // D3D11_BLEND
|
||||
unsigned int destColor : 5; // D3D11_BLEND
|
||||
unsigned int srcAlpha : 5; // D3D11_BLEND
|
||||
unsigned int destAlpha : 5; // D3D11_BLEND
|
||||
unsigned int blendOpColor : 3; // D3D11_BLEND_OP
|
||||
unsigned int blendOpAlpha : 3; // D3D11_BLEND_OP
|
||||
unsigned int logicOpEnable : 1;
|
||||
unsigned int logicOp : 4; // D3D11_LOGIC_OP
|
||||
unsigned int colorWriteMask : 4;
|
||||
};
|
||||
|
||||
struct D3D11DepthStencilKey {
|
||||
// Depth/Stencil
|
||||
unsigned int depthTestEnable : 1;
|
||||
unsigned int depthWriteEnable : 1;
|
||||
unsigned int depthCompareOp : 3; // D3D11_COMPARISON
|
||||
unsigned int stencilTestEnable : 1;
|
||||
unsigned int stencilCompareOp : 3; // D3D11_COMPARISON
|
||||
unsigned int stencilPassOp : 4; // D3D11_STENCIL_OP
|
||||
unsigned int stencilFailOp : 4; // D3D11_STENCIL_OP
|
||||
unsigned int stencilDepthFailOp : 4; // D3D11_STENCIL_OP
|
||||
};
|
||||
|
||||
struct D3D11RasterKey {
|
||||
unsigned int cullMode : 2; // D3D11_CULL_MODE
|
||||
};
|
||||
|
||||
// In D3D11 we cache blend state objects etc, and we simply emit keys, which are then also used to create these objects.
|
||||
struct D3D11StateKeys {
|
||||
D3D11BlendKey blend;
|
||||
D3D11DepthStencilKey depthStencil;
|
||||
D3D11RasterKey raster;
|
||||
};
|
||||
|
||||
struct D3D11DynamicState {
|
||||
int topology;
|
||||
bool useBlendColor;
|
||||
uint32_t blendColor;
|
||||
bool useStencil;
|
||||
uint8_t stencilRef;
|
||||
uint8_t stencilWriteMask;
|
||||
uint8_t stencilCompareMask;
|
||||
D3D11_VIEWPORT viewport;
|
||||
D3D11_RECT scissor;
|
||||
};
|
||||
|
||||
void ConvertStateToKeys(FramebufferManagerCommon *fbManager, ShaderManagerD3D11 *shaderManager, int prim, D3D11StateKeys &key, D3D11DynamicState &dynState) {
|
||||
memset(&key, 0, sizeof(key));
|
||||
memset(&dynState, 0, sizeof(dynState));
|
||||
// Unfortunately, this isn't implemented yet.
|
||||
gstate_c.allowShaderBlend = false;
|
||||
|
||||
// Set blend - unless we need to do it in the shader.
|
||||
GenericBlendState blendState;
|
||||
ConvertBlendState(blendState, gstate_c.allowShaderBlend);
|
||||
|
||||
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
|
||||
|
||||
ViewportAndScissor vpAndScissor;
|
||||
ConvertViewportAndScissor(useBufferedRendering,
|
||||
fbManager->GetRenderWidth(), fbManager->GetRenderHeight(),
|
||||
fbManager->GetTargetBufferWidth(), fbManager->GetTargetBufferHeight(),
|
||||
vpAndScissor);
|
||||
|
||||
if (blendState.applyShaderBlending) {
|
||||
if (ApplyShaderBlending()) {
|
||||
// We may still want to do something about stencil -> alpha.
|
||||
ApplyStencilReplaceAndLogicOp(blendState.replaceAlphaWithStencil, blendState);
|
||||
} else {
|
||||
// Until next time, force it off.
|
||||
ResetShaderBlending();
|
||||
gstate_c.allowShaderBlend = false;
|
||||
}
|
||||
} else if (blendState.resetShaderBlending) {
|
||||
ResetShaderBlending();
|
||||
}
|
||||
|
||||
if (blendState.enabled) {
|
||||
key.blend.blendEnable = true;
|
||||
key.blend.blendOpColor = d3d11BlendEqLookup[(size_t)blendState.eqColor];
|
||||
key.blend.blendOpAlpha = d3d11BlendEqLookup[(size_t)blendState.eqAlpha];
|
||||
key.blend.srcColor = d3d11BlendFactorLookup[(size_t)blendState.srcColor];
|
||||
key.blend.srcAlpha = d3d11BlendFactorLookup[(size_t)blendState.srcAlpha];
|
||||
key.blend.destColor = d3d11BlendFactorLookup[(size_t)blendState.dstColor];
|
||||
key.blend.destAlpha = d3d11BlendFactorLookup[(size_t)blendState.dstAlpha];
|
||||
if (blendState.dirtyShaderBlend) {
|
||||
gstate_c.Dirty(DIRTY_SHADERBLEND);
|
||||
}
|
||||
dynState.useBlendColor = blendState.useBlendColor;
|
||||
if (blendState.useBlendColor) {
|
||||
dynState.blendColor = blendState.blendColor;
|
||||
}
|
||||
} else {
|
||||
key.blend.blendEnable = false;
|
||||
dynState.useBlendColor = false;
|
||||
}
|
||||
|
||||
dynState.useStencil = false;
|
||||
|
||||
// Set ColorMask/Stencil/Depth
|
||||
if (gstate.isModeClear()) {
|
||||
key.blend.logicOpEnable = false;
|
||||
key.raster.cullMode = D3D11_CULL_NONE;
|
||||
|
||||
key.depthStencil.depthTestEnable = true;
|
||||
key.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
|
||||
key.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask();
|
||||
if (gstate.isClearModeDepthMask()) {
|
||||
fbManager->SetDepthUpdated();
|
||||
}
|
||||
|
||||
// Color Test
|
||||
bool colorMask = gstate.isClearModeColorMask();
|
||||
bool alphaMask = gstate.isClearModeAlphaMask();
|
||||
key.blend.colorWriteMask = (colorMask ? (1 | 2 | 4) : 0) | (alphaMask ? 8 : 0);
|
||||
|
||||
// Stencil Test
|
||||
if (alphaMask) {
|
||||
key.depthStencil.stencilTestEnable = true;
|
||||
key.depthStencil.stencilCompareOp = D3D11_COMPARISON_ALWAYS;
|
||||
key.depthStencil.stencilPassOp = D3D11_STENCIL_OP_REPLACE;
|
||||
key.depthStencil.stencilFailOp = D3D11_STENCIL_OP_REPLACE;
|
||||
key.depthStencil.stencilDepthFailOp = D3D11_STENCIL_OP_REPLACE;
|
||||
dynState.useStencil = true;
|
||||
// In clear mode, the stencil value is set to the alpha value of the vertex.
|
||||
// A normal clear will be 2 points, the second point has the color.
|
||||
// We override this value in the pipeline from software transform for clear rectangles.
|
||||
dynState.stencilRef = 0xFF;
|
||||
dynState.stencilWriteMask = 0xFF;
|
||||
} else {
|
||||
key.depthStencil.stencilTestEnable = false;
|
||||
dynState.useStencil = false;
|
||||
}
|
||||
} else {
|
||||
if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
|
||||
// Logic Ops
|
||||
if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) {
|
||||
key.blend.logicOpEnable = true;
|
||||
// key.blendKey.logicOp = logicOps[gstate.getLogicOp()];
|
||||
} else {
|
||||
key.blend.logicOpEnable = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Set cull
|
||||
bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
|
||||
key.raster.cullMode = wantCull ? (gstate.getCullMode() ? D3D11_CULL_FRONT : D3D11_CULL_BACK) : D3D11_CULL_NONE;
|
||||
|
||||
// Depth Test
|
||||
if (gstate.isDepthTestEnabled()) {
|
||||
key.depthStencil.depthTestEnable = true;
|
||||
key.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
|
||||
key.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled();
|
||||
if (gstate.isDepthWriteEnabled()) {
|
||||
fbManager->SetDepthUpdated();
|
||||
}
|
||||
} else {
|
||||
key.depthStencil.depthTestEnable = false;
|
||||
key.depthStencil.depthWriteEnable = false;
|
||||
key.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
|
||||
}
|
||||
|
||||
// PSP color/alpha mask is per bit but we can only support per byte.
|
||||
// But let's do that, at least. And let's try a threshold.
|
||||
bool rmask = (gstate.pmskc & 0xFF) < 128;
|
||||
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
|
||||
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
|
||||
bool amask = (gstate.pmska & 0xFF) < 128;
|
||||
|
||||
#ifndef MOBILE_DEVICE
|
||||
u8 abits = (gstate.pmska >> 0) & 0xFF;
|
||||
u8 rbits = (gstate.pmskc >> 0) & 0xFF;
|
||||
u8 gbits = (gstate.pmskc >> 8) & 0xFF;
|
||||
u8 bbits = (gstate.pmskc >> 16) & 0xFF;
|
||||
if ((rbits != 0 && rbits != 0xFF) || (gbits != 0 && gbits != 0xFF) || (bbits != 0 && bbits != 0xFF)) {
|
||||
WARN_LOG_REPORT_ONCE(rgbmask, G3D, "Unsupported RGB mask: r=%02x g=%02x b=%02x", rbits, gbits, bbits);
|
||||
}
|
||||
if (abits != 0 && abits != 0xFF) {
|
||||
// The stencil part of the mask is supported.
|
||||
WARN_LOG_REPORT_ONCE(amask, G3D, "Unsupported alpha/stencil mask: %02x", abits);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Let's not write to alpha if stencil isn't enabled.
|
||||
if (!gstate.isStencilTestEnabled()) {
|
||||
amask = false;
|
||||
} else {
|
||||
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
|
||||
if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
|
||||
amask = false;
|
||||
}
|
||||
}
|
||||
|
||||
key.blend.colorWriteMask = (rmask ? 1 : 0) | (gmask ? 2 : 0) | (bmask ? 4 : 0) | (amask ? 8 : 0);
|
||||
|
||||
GenericStencilFuncState stencilState;
|
||||
ConvertStencilFuncState(stencilState);
|
||||
|
||||
// Stencil Test
|
||||
if (stencilState.enabled) {
|
||||
key.depthStencil.stencilTestEnable = true;
|
||||
key.depthStencil.stencilCompareOp = compareOps[stencilState.testFunc];
|
||||
key.depthStencil.stencilPassOp = stencilOps[stencilState.zPass];
|
||||
key.depthStencil.stencilFailOp = stencilOps[stencilState.sFail];
|
||||
key.depthStencil.stencilDepthFailOp = stencilOps[stencilState.zFail];
|
||||
dynState.useStencil = true;
|
||||
dynState.stencilRef = stencilState.testRef;
|
||||
dynState.stencilCompareMask = stencilState.testMask;
|
||||
dynState.stencilWriteMask = stencilState.writeMask;
|
||||
} else {
|
||||
key.depthStencil.stencilTestEnable = false;
|
||||
dynState.useStencil = false;
|
||||
}
|
||||
}
|
||||
|
||||
dynState.topology = primToD3D11[prim];
|
||||
|
||||
D3D11_VIEWPORT &vp = dynState.viewport;
|
||||
vp.TopLeftX = vpAndScissor.viewportX;
|
||||
vp.TopLeftY = vpAndScissor.viewportY;
|
||||
vp.Width = vpAndScissor.viewportW;
|
||||
vp.Height = vpAndScissor.viewportH;
|
||||
vp.MinDepth = vpAndScissor.depthRangeMin;
|
||||
vp.MaxDepth = vpAndScissor.depthRangeMax;
|
||||
if (vpAndScissor.dirtyProj) {
|
||||
gstate_c.Dirty(DIRTY_PROJMATRIX);
|
||||
}
|
||||
|
||||
D3D11_RECT &scissor = dynState.scissor;
|
||||
scissor.left = vpAndScissor.scissorX;
|
||||
scissor.top = vpAndScissor.scissorY;
|
||||
scissor.right = vpAndScissor.scissorX + vpAndScissor.scissorW;
|
||||
scissor.bottom = vpAndScissor.scissorY + vpAndScissor.scissorH;
|
||||
|
||||
float depthMin = vpAndScissor.depthRangeMin;
|
||||
float depthMax = vpAndScissor.depthRangeMax;
|
||||
|
||||
if (depthMin < 0.0f) depthMin = 0.0f;
|
||||
if (depthMax > 1.0f) depthMax = 1.0f;
|
||||
if (vpAndScissor.dirtyDepth) {
|
||||
gstate_c.Dirty(DIRTY_DEPTHRANGE);
|
||||
}
|
||||
}
|
5
GPU/D3D11/VertexShaderGeneratorD3D11.cpp
Normal file
5
GPU/D3D11/VertexShaderGeneratorD3D11.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
#include "GPU/D3D11/VertexShaderGeneratorD3D11.h"
|
||||
|
||||
void GenerateVertexShaderD3D11(const ShaderID &id, char *buffer, bool *usesLighting) {
|
||||
|
||||
}
|
5
GPU/D3D11/VertexShaderGeneratorD3D11.h
Normal file
5
GPU/D3D11/VertexShaderGeneratorD3D11.h
Normal file
@ -0,0 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
|
||||
void GenerateVertexShaderD3D11(const ShaderID &id, char *buffer, bool *usesLighting);
|
@ -17,8 +17,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Globals.h"
|
||||
|
||||
#include "GPU/Common/ShaderId.h"
|
||||
|
||||
namespace DX9 {
|
||||
|
@ -83,7 +83,7 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_SECURE_NO_WARNINGS;WIN32;_ARCH_32=1;_M_IX86=1;_DEBUG;_LIB;_UNICODE;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
@ -105,7 +105,7 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
<OmitFramePointers>false</OmitFramePointers>
|
||||
@ -131,7 +131,7 @@
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
@ -157,7 +157,7 @@
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Fast</FloatingPointModel>
|
||||
@ -190,6 +190,7 @@
|
||||
<ClInclude Include="Common\PostShader.h" />
|
||||
<ClInclude Include="Common\ShaderCommon.h" />
|
||||
<ClInclude Include="Common\ShaderId.h" />
|
||||
<ClInclude Include="Common\ShaderUniforms.h" />
|
||||
<ClInclude Include="Common\SoftwareTransformCommon.h" />
|
||||
<ClInclude Include="Common\SplineCommon.h" />
|
||||
<ClInclude Include="Common\TextureDecoderNEON.h">
|
||||
@ -202,6 +203,9 @@
|
||||
<ClInclude Include="Common\TextureScalerCommon.h" />
|
||||
<ClInclude Include="Common\TransformCommon.h" />
|
||||
<ClInclude Include="Common\VertexDecoderCommon.h" />
|
||||
<ClInclude Include="D3D11\FragmentShaderGeneratorD3D11.h" />
|
||||
<ClInclude Include="D3D11\ShaderManagerD3D11.h" />
|
||||
<ClInclude Include="D3D11\VertexShaderGeneratorD3D11.h" />
|
||||
<ClInclude Include="Debugger\Breakpoints.h" />
|
||||
<ClInclude Include="Debugger\Stepping.h" />
|
||||
<ClInclude Include="Directx9\DepalettizeShaderDX9.h" />
|
||||
@ -261,6 +265,7 @@
|
||||
<ClCompile Include="Common\IndexGenerator.cpp" />
|
||||
<ClCompile Include="Common\PostShader.cpp" />
|
||||
<ClCompile Include="Common\ShaderId.cpp" />
|
||||
<ClCompile Include="Common\ShaderUniforms.cpp" />
|
||||
<ClCompile Include="Common\SplineCommon.cpp" />
|
||||
<ClCompile Include="Common\TextureDecoderNEON.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
|
||||
@ -286,6 +291,10 @@
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\VertexDecoderCommon.cpp" />
|
||||
<ClCompile Include="Common\VertexDecoderX86.cpp" />
|
||||
<ClCompile Include="D3D11\FragmentShaderGeneratorD3D11.cpp" />
|
||||
<ClCompile Include="D3D11\ShaderManagerD3D11.cpp" />
|
||||
<ClCompile Include="D3D11\StateMappingD3D11.cpp" />
|
||||
<ClCompile Include="D3D11\VertexShaderGeneratorD3D11.cpp" />
|
||||
<ClCompile Include="Debugger\Breakpoints.cpp" />
|
||||
<ClCompile Include="Debugger\Stepping.cpp" />
|
||||
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp" />
|
||||
|
@ -25,6 +25,9 @@
|
||||
<Filter Include="Vulkan">
|
||||
<UniqueIdentifier>{3c621896-140c-4c8b-8e4d-a478bfdeca8a}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="D3D11">
|
||||
<UniqueIdentifier>{88eb5cea-ec25-4881-89da-02f9f2fa8f3f}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="ge_constants.h">
|
||||
@ -222,6 +225,18 @@
|
||||
<ClInclude Include="GLES\FragmentTestCacheGLES.h">
|
||||
<Filter>GLES</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Common\ShaderUniforms.h">
|
||||
<Filter>Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="D3D11\ShaderManagerD3D11.h">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="D3D11\VertexShaderGeneratorD3D11.h">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="D3D11\FragmentShaderGeneratorD3D11.h">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="Math3D.cpp">
|
||||
@ -428,5 +443,20 @@
|
||||
<ClCompile Include="GLES\FragmentTestCacheGLES.cpp">
|
||||
<Filter>GLES</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="D3D11\StateMappingD3D11.cpp">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\ShaderUniforms.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="D3D11\ShaderManagerD3D11.cpp">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="D3D11\VertexShaderGeneratorD3D11.cpp">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="D3D11\FragmentShaderGeneratorD3D11.cpp">
|
||||
<Filter>D3D11</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -150,12 +150,6 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons
|
||||
}
|
||||
}
|
||||
|
||||
static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
|
||||
const Vec3 trans(0, 0, gstate_c.vpZOffset * 0.5f + 0.5f);
|
||||
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
|
||||
in.translateAndScale(trans, scale);
|
||||
}
|
||||
|
||||
ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
|
||||
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) {
|
||||
codeBuffer_ = new char[16384];
|
||||
@ -187,207 +181,6 @@ uint32_t ShaderManagerVulkan::PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *b
|
||||
return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
|
||||
}
|
||||
|
||||
void ShaderManagerVulkan::BaseUpdateUniforms(uint64_t dirtyUniforms) {
|
||||
if (dirtyUniforms & DIRTY_TEXENV) {
|
||||
Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
|
||||
Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
|
||||
Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_FOGCOLOR) {
|
||||
Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_SHADERBLEND) {
|
||||
Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
|
||||
Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_TEXCLAMP) {
|
||||
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
|
||||
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
|
||||
const int w = gstate.getTextureWidth(0);
|
||||
const int h = gstate.getTextureHeight(0);
|
||||
const float widthFactor = (float)w * invW;
|
||||
const float heightFactor = (float)h * invH;
|
||||
|
||||
// First wrap xy, then half texel xy (for clamp.)
|
||||
ub_base.texClamp[0] = widthFactor;
|
||||
ub_base.texClamp[1] = heightFactor;
|
||||
ub_base.texClamp[2] = invW * 0.5f;
|
||||
ub_base.texClamp[3] = invH * 0.5f;
|
||||
ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW;
|
||||
ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH;
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_PROJMATRIX) {
|
||||
Matrix4x4 flippedMatrix;
|
||||
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
|
||||
|
||||
const bool invertedY = gstate_c.vpHeight < 0;
|
||||
if (invertedY) {
|
||||
flippedMatrix[1] = -flippedMatrix[1];
|
||||
flippedMatrix[5] = -flippedMatrix[5];
|
||||
flippedMatrix[9] = -flippedMatrix[9];
|
||||
flippedMatrix[13] = -flippedMatrix[13];
|
||||
}
|
||||
const bool invertedX = gstate_c.vpWidth < 0;
|
||||
if (invertedX) {
|
||||
flippedMatrix[0] = -flippedMatrix[0];
|
||||
flippedMatrix[4] = -flippedMatrix[4];
|
||||
flippedMatrix[8] = -flippedMatrix[8];
|
||||
flippedMatrix[12] = -flippedMatrix[12];
|
||||
}
|
||||
ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
|
||||
CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr());
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
|
||||
Matrix4x4 proj_through;
|
||||
proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
|
||||
CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr());
|
||||
}
|
||||
|
||||
// Transform
|
||||
if (dirtyUniforms & DIRTY_WORLDMATRIX) {
|
||||
ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_VIEWMATRIX) {
|
||||
ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_TEXMATRIX) {
|
||||
ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
|
||||
}
|
||||
|
||||
// Combined two small uniforms
|
||||
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
|
||||
float fogcoef_stencil[3] = {
|
||||
getFloat24(gstate.fog1),
|
||||
getFloat24(gstate.fog2),
|
||||
(float)gstate.getStencilTestRef()
|
||||
};
|
||||
if (my_isinf(fogcoef_stencil[1])) {
|
||||
// not really sure what a sensible value might be.
|
||||
fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
|
||||
} else if (my_isnan(fogcoef_stencil[1])) {
|
||||
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
|
||||
// Just put the fog far away at a large finite distance.
|
||||
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
|
||||
// so it's best to just make it a sane calculation.
|
||||
fogcoef_stencil[0] = 100000.0f;
|
||||
fogcoef_stencil[1] = 1.0f;
|
||||
}
|
||||
#ifndef MOBILE_DEVICE
|
||||
else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
|
||||
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
|
||||
}
|
||||
#endif
|
||||
CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
|
||||
}
|
||||
|
||||
// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
|
||||
if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
|
||||
Uint8x3ToFloat4_AlphaUint8(ub_base.matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
|
||||
}
|
||||
|
||||
// Texturing
|
||||
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
|
||||
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
|
||||
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
|
||||
const int w = gstate.getTextureWidth(0);
|
||||
const int h = gstate.getTextureHeight(0);
|
||||
const float widthFactor = (float)w * invW;
|
||||
const float heightFactor = (float)h * invH;
|
||||
ub_base.uvScaleOffset[0] = widthFactor;
|
||||
ub_base.uvScaleOffset[1] = heightFactor;
|
||||
ub_base.uvScaleOffset[2] = 0.0f;
|
||||
ub_base.uvScaleOffset[3] = 0.0f;
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
|
||||
float viewZScale = gstate.getViewportZScale();
|
||||
float viewZCenter = gstate.getViewportZCenter();
|
||||
float viewZInvScale;
|
||||
|
||||
// We had to scale and translate Z to account for our clamped Z range.
|
||||
// Therefore, we also need to reverse this to round properly.
|
||||
//
|
||||
// Example: scale = 65535.0, center = 0.0
|
||||
// Resulting range = -65535 to 65535, clamped to [0, 65535]
|
||||
// gstate_c.vpDepthScale = 2.0f
|
||||
// gstate_c.vpZOffset = -1.0f
|
||||
//
|
||||
// The projection already accounts for those, so we need to reverse them.
|
||||
//
|
||||
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
|
||||
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
|
||||
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
|
||||
|
||||
if (viewZScale != 0.0) {
|
||||
viewZInvScale = 1.0f / viewZScale;
|
||||
} else {
|
||||
viewZInvScale = 0.0;
|
||||
}
|
||||
|
||||
ub_base.depthRange[0] = viewZScale;
|
||||
ub_base.depthRange[1] = viewZCenter;
|
||||
ub_base.depthRange[2] = viewZCenter;
|
||||
ub_base.depthRange[3] = viewZInvScale;
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderManagerVulkan::LightUpdateUniforms(uint64_t dirtyUniforms) {
|
||||
// Lighting
|
||||
if (dirtyUniforms & DIRTY_AMBIENT) {
|
||||
Uint8x3ToFloat4_AlphaUint8(ub_lights.ambientColor, gstate.ambientcolor, gstate.getAmbientA());
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_MATDIFFUSE) {
|
||||
Uint8x3ToFloat4(ub_lights.materialDiffuse, gstate.materialdiffuse);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_MATEMISSIVE) {
|
||||
Uint8x3ToFloat4(ub_lights.materialEmissive, gstate.materialemissive);
|
||||
}
|
||||
if (dirtyUniforms & DIRTY_MATSPECULAR) {
|
||||
Uint8x3ToFloat4_Alpha(ub_lights.materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
|
||||
if (gstate.isDirectionalLight(i)) {
|
||||
// Prenormalize
|
||||
float x = getFloat24(gstate.lpos[i * 3 + 0]);
|
||||
float y = getFloat24(gstate.lpos[i * 3 + 1]);
|
||||
float z = getFloat24(gstate.lpos[i * 3 + 2]);
|
||||
float len = sqrtf(x*x + y*y + z*z);
|
||||
if (len == 0.0f)
|
||||
len = 1.0f;
|
||||
else
|
||||
len = 1.0f / len;
|
||||
float vec[3] = { x * len, y * len, z * len };
|
||||
CopyFloat3To4(ub_lights.lpos[i], vec);
|
||||
} else {
|
||||
ExpandFloat24x3ToFloat4(ub_lights.lpos[i], &gstate.lpos[i * 3]);
|
||||
}
|
||||
ExpandFloat24x3ToFloat4(ub_lights.ldir[i], &gstate.ldir[i * 3]);
|
||||
ExpandFloat24x3ToFloat4(ub_lights.latt[i], &gstate.latt[i * 3]);
|
||||
CopyFloat1To4(ub_lights.lightAngle[i], getFloat24(gstate.lcutoff[i]));
|
||||
CopyFloat1To4(ub_lights.lightSpotCoef[i], getFloat24(gstate.lconv[i]));
|
||||
Uint8x3ToFloat4(ub_lights.lightAmbient[i], gstate.lcolor[i * 3]);
|
||||
Uint8x3ToFloat4(ub_lights.lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
|
||||
Uint8x3ToFloat4(ub_lights.lightSpecular[i], gstate.lcolor[i * 3 + 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderManagerVulkan::BoneUpdateUniforms(uint64_t dirtyUniforms) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
ConvertMatrix4x3To4x4(ub_bones.bones[i], gstate.boneMatrix + 12 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderManagerVulkan::DeviceRestore(VulkanContext *vulkan) {
|
||||
vulkan_ = vulkan;
|
||||
uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
|
||||
@ -429,11 +222,11 @@ uint64_t ShaderManagerVulkan::UpdateUniforms() {
|
||||
uint64_t dirty = gstate_c.GetDirtyUniforms();
|
||||
if (dirty != 0) {
|
||||
if (dirty & DIRTY_BASE_UNIFORMS)
|
||||
BaseUpdateUniforms(dirty);
|
||||
BaseUpdateUniforms(&ub_base, dirty);
|
||||
if (dirty & DIRTY_LIGHT_UNIFORMS)
|
||||
LightUpdateUniforms(dirty);
|
||||
LightUpdateUniforms(&ub_lights, dirty);
|
||||
if (dirty & DIRTY_BONE_UNIFORMS)
|
||||
BoneUpdateUniforms(dirty);
|
||||
BoneUpdateUniforms(&ub_bones, dirty);
|
||||
}
|
||||
gstate_c.CleanUniforms();
|
||||
return dirty;
|
||||
|
@ -27,104 +27,7 @@
|
||||
#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
|
||||
#include "GPU/Vulkan/VulkanUtil.h"
|
||||
#include "math/lin/matrix4x4.h"
|
||||
|
||||
void ConvertProjMatrixToVulkan(Matrix4x4 & in);
|
||||
|
||||
// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
|
||||
enum : uint64_t {
|
||||
DIRTY_BASE_UNIFORMS =
|
||||
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
|
||||
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
|
||||
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
|
||||
DIRTY_LIGHT_UNIFORMS =
|
||||
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
|
||||
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
|
||||
};
|
||||
|
||||
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
|
||||
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
|
||||
struct UB_VS_FS_Base {
|
||||
float proj[16];
|
||||
float proj_through[16];
|
||||
float view[16];
|
||||
float world[16];
|
||||
float tex[16]; // not that common, may want to break out
|
||||
float uvScaleOffset[4];
|
||||
float depthRange[4];
|
||||
float fogCoef_stencil[4];
|
||||
float matAmbient[4];
|
||||
// Fragment data
|
||||
float fogColor[4];
|
||||
float texEnvColor[4];
|
||||
int alphaColorRef[4];
|
||||
int colorTestMask[4];
|
||||
float blendFixA[4];
|
||||
float blendFixB[4];
|
||||
float texClamp[4];
|
||||
float texClampOffset[4];
|
||||
};
|
||||
|
||||
static const char *ub_baseStr =
|
||||
R"( mat4 proj_mtx;
|
||||
mat4 proj_through_mtx;
|
||||
mat4 view_mtx;
|
||||
mat4 world_mtx;
|
||||
mat4 tex_mtx;
|
||||
vec4 uvscaleoffset;
|
||||
vec4 depthRange;
|
||||
vec3 fogcoef_stencilreplace;
|
||||
vec4 matambientalpha;
|
||||
vec3 fogcolor;
|
||||
vec3 texenv;
|
||||
ivec4 alphacolorref;
|
||||
ivec4 alphacolormask;
|
||||
vec3 blendFixA;
|
||||
vec3 blendFixB;
|
||||
vec4 texclamp;
|
||||
vec2 texclampoff;
|
||||
)";
|
||||
|
||||
// 576 bytes. Can we get down to 512?
|
||||
struct UB_VS_Lights {
|
||||
float ambientColor[4];
|
||||
float materialDiffuse[4];
|
||||
float materialSpecular[4];
|
||||
float materialEmissive[4];
|
||||
float lpos[4][4];
|
||||
float ldir[4][4];
|
||||
float latt[4][4];
|
||||
float lightAngle[4][4]; // TODO: Merge with lightSpotCoef, use .xy
|
||||
float lightSpotCoef[4][4];
|
||||
float lightAmbient[4][4];
|
||||
float lightDiffuse[4][4];
|
||||
float lightSpecular[4][4];
|
||||
};
|
||||
|
||||
static const char *ub_vs_lightsStr =
|
||||
R"( vec4 globalAmbient;
|
||||
vec3 matdiffuse;
|
||||
vec4 matspecular;
|
||||
vec3 matemissive;
|
||||
vec3 pos[4];
|
||||
vec3 dir[4];
|
||||
vec3 att[4];
|
||||
float angle[4];
|
||||
float spotCoef[4];
|
||||
vec3 ambient[4];
|
||||
vec3 diffuse[4];
|
||||
vec3 specular[4];
|
||||
)";
|
||||
|
||||
// With some cleverness, we could get away with uploading just half this when only the four first
|
||||
// bones are being used. This is 512b, 256b would be great.
|
||||
// Could also move to 4x3 matrices - would let us fit 5 bones into 256b.
|
||||
struct UB_VS_Bones {
|
||||
float bones[8][16];
|
||||
};
|
||||
|
||||
static const char *ub_vs_bonesStr =
|
||||
R"( mat4 m[8];
|
||||
)";
|
||||
#include "GPU/Common/ShaderUniforms.h"
|
||||
|
||||
class VulkanContext;
|
||||
class VulkanPushBuffer;
|
||||
@ -215,10 +118,6 @@ public:
|
||||
uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf);
|
||||
|
||||
private:
|
||||
void BaseUpdateUniforms(uint64_t dirtyUniforms);
|
||||
void LightUpdateUniforms(uint64_t dirtyUniforms);
|
||||
void BoneUpdateUniforms(uint64_t dirtyUniforms);
|
||||
|
||||
void Clear();
|
||||
|
||||
VulkanContext *vulkan_;
|
||||
|
Loading…
Reference in New Issue
Block a user