Start stubbing out a new D3D11 backend

This commit is contained in:
Henrik Rydgard 2017-02-08 17:35:41 +01:00
parent 9dd3e18ed4
commit 175b97ef34
15 changed files with 1234 additions and 324 deletions

View File

@ -1,5 +1,7 @@
#pragma once
#include <string>
#include <cstdint>
#include "base/basictypes.h"
// TODO: There will be additional bits, indicating that groups of these will be
@ -93,9 +95,9 @@ struct ShaderID {
}
}
u32 d[2];
uint32_t d[2];
bool operator < (const ShaderID &other) const {
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
for (size_t i = 0; i < sizeof(d) / sizeof(uint32_t); i++) {
if (d[i] < other.d[i])
return true;
if (d[i] > other.d[i])
@ -104,7 +106,7 @@ struct ShaderID {
return false;
}
bool operator == (const ShaderID &other) const {
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
for (size_t i = 0; i < sizeof(d) / sizeof(uint32_t); i++) {
if (d[i] != other.d[i])
return false;
}
@ -142,7 +144,7 @@ struct ShaderID {
bool CanUseHardwareTransform(int prim);
void ComputeVertexShaderID(ShaderID *id, u32 vertexType, bool useHWTransform);
void ComputeVertexShaderID(ShaderID *id, uint32_t vertexType, bool useHWTransform);
// Generates a compact string that describes the shader. Useful in a list to get an overview
// of the current flora of shaders.
std::string VertexShaderDesc(const ShaderID &id);

View File

@ -0,0 +1,215 @@
#include "ShaderUniforms.h"
#include "math/dataconv.h"
#include "math/lin/matrix4x4.h"
#include "math/math_util.h"
#include "math/lin/vec3.h"
#include "GPU/GPUState.h"
#include "GPU/Math3D.h"
#include "Core/Reporting.h"
static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
const Vec3 trans(0, 0, gstate_c.vpZOffset * 0.5f + 0.5f);
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
in.translateAndScale(trans, scale);
}
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXENV) {
Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
}
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
}
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB());
}
if (dirtyUniforms & DIRTY_TEXCLAMP) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
const int w = gstate.getTextureWidth(0);
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
// First wrap xy, then half texel xy (for clamp.)
ub->texClamp[0] = widthFactor;
ub->texClamp[1] = heightFactor;
ub->texClamp[2] = invW * 0.5f;
ub->texClamp[3] = invH * 0.5f;
ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
}
if (dirtyUniforms & DIRTY_PROJMATRIX) {
Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
const bool invertedY = gstate_c.vpHeight < 0;
if (invertedY) {
flippedMatrix[1] = -flippedMatrix[1];
flippedMatrix[5] = -flippedMatrix[5];
flippedMatrix[9] = -flippedMatrix[9];
flippedMatrix[13] = -flippedMatrix[13];
}
const bool invertedX = gstate_c.vpWidth < 0;
if (invertedX) {
flippedMatrix[0] = -flippedMatrix[0];
flippedMatrix[4] = -flippedMatrix[4];
flippedMatrix[8] = -flippedMatrix[8];
flippedMatrix[12] = -flippedMatrix[12];
}
ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
}
if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
Matrix4x4 proj_through;
proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
}
// Transform
if (dirtyUniforms & DIRTY_WORLDMATRIX) {
ConvertMatrix4x3To4x4(ub->world, gstate.worldMatrix);
}
if (dirtyUniforms & DIRTY_VIEWMATRIX) {
ConvertMatrix4x3To4x4(ub->view, gstate.viewMatrix);
}
if (dirtyUniforms & DIRTY_TEXMATRIX) {
ConvertMatrix4x3To4x4(ub->tex, gstate.tgenMatrix);
}
// Combined two small uniforms
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
float fogcoef_stencil[3] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
(float)gstate.getStencilTestRef()
};
if (my_isinf(fogcoef_stencil[1])) {
// not really sure what a sensible value might be.
fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
} else if (my_isnan(fogcoef_stencil[1])) {
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
// Just put the fog far away at a large finite distance.
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
// so it's best to just make it a sane calculation.
fogcoef_stencil[0] = 100000.0f;
fogcoef_stencil[1] = 1.0f;
}
#ifndef MOBILE_DEVICE
else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
}
#endif
CopyFloat3(ub->fogCoef_stencil, fogcoef_stencil);
}
// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
}
// Texturing
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
const int w = gstate.getTextureWidth(0);
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
ub->uvScaleOffset[0] = widthFactor;
ub->uvScaleOffset[1] = heightFactor;
ub->uvScaleOffset[2] = 0.0f;
ub->uvScaleOffset[3] = 0.0f;
}
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
float viewZScale = gstate.getViewportZScale();
float viewZCenter = gstate.getViewportZCenter();
float viewZInvScale;
// We had to scale and translate Z to account for our clamped Z range.
// Therefore, we also need to reverse this to round properly.
//
// Example: scale = 65535.0, center = 0.0
// Resulting range = -65535 to 65535, clamped to [0, 65535]
// gstate_c.vpDepthScale = 2.0f
// gstate_c.vpZOffset = -1.0f
//
// The projection already accounts for those, so we need to reverse them.
//
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
if (viewZScale != 0.0) {
viewZInvScale = 1.0f / viewZScale;
} else {
viewZInvScale = 0.0;
}
ub->depthRange[0] = viewZScale;
ub->depthRange[1] = viewZCenter;
ub->depthRange[2] = viewZCenter;
ub->depthRange[3] = viewZInvScale;
}
}
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
// Lighting
if (dirtyUniforms & DIRTY_AMBIENT) {
Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
}
if (dirtyUniforms & DIRTY_MATDIFFUSE) {
Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
}
if (dirtyUniforms & DIRTY_MATEMISSIVE) {
Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive);
}
if (dirtyUniforms & DIRTY_MATSPECULAR) {
Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
CopyFloat3To4(ub->lpos[i], vec);
} else {
ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
}
ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
CopyFloat1To4(ub->lightAngle[i], getFloat24(gstate.lcutoff[i]));
CopyFloat1To4(ub->lightSpotCoef[i], getFloat24(gstate.lconv[i]));
Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
}
}
}
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
for (int i = 0; i < 8; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(ub->bones[i], gstate.boneMatrix + 12 * i);
}
}
}

147
GPU/Common/ShaderUniforms.h Normal file
View File

@ -0,0 +1,147 @@
#pragma once
#include <cstdint>
#include "ShaderCommon.h"
// Used by the "modern" backends that use uniform buffers. They can share this without issue.
// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
struct UB_VS_FS_Base {
float proj[16];
float proj_through[16];
float view[16];
float world[16];
float tex[16]; // not that common, may want to break out
float uvScaleOffset[4];
float depthRange[4];
float fogCoef_stencil[4];
float matAmbient[4];
// Fragment data
float fogColor[4];
float texEnvColor[4];
int alphaColorRef[4];
int colorTestMask[4];
float blendFixA[4];
float blendFixB[4];
float texClamp[4];
float texClampOffset[4];
};
static const char *ub_baseStr =
R"( mat4 proj_mtx;
mat4 proj_through_mtx;
mat4 view_mtx;
mat4 world_mtx;
mat4 tex_mtx;
vec4 uvscaleoffset;
vec4 depthRange;
vec3 fogcoef_stencilreplace;
vec4 matambientalpha;
vec3 fogcolor;
vec3 texenv;
ivec4 alphacolorref;
ivec4 alphacolormask;
vec3 blendFixA;
vec3 blendFixB;
vec4 texclamp;
vec2 texclampoff;
)";
static const char *cb_baseStr =
R"( matrix proj_mtx;
matrix proj_through_mtx;
matrix view_mtx;
matrix world_mtx;
matrix tex_mtx;
float4 uvscaleoffset;
float4 depthRange;
float3 fogcoef_stencilreplace;
float4 matambientalpha;
float3 fogcolor;
float3 texenv;
ifloat4 alphacolorref;
ifloat4 alphacolormask;
float3 blendFixA;
float3 blendFixB;
float4 texclamp;
float2 texclampoff;
)";
// 576 bytes. Can we get down to 512?
struct UB_VS_Lights {
float ambientColor[4];
float materialDiffuse[4];
float materialSpecular[4];
float materialEmissive[4];
float lpos[4][4];
float ldir[4][4];
float latt[4][4];
float lightAngle[4][4]; // TODO: Merge with lightSpotCoef, use .xy
float lightSpotCoef[4][4];
float lightAmbient[4][4];
float lightDiffuse[4][4];
float lightSpecular[4][4];
};
static const char *ub_vs_lightsStr =
R"( vec4 globalAmbient;
vec3 matdiffuse;
vec4 matspecular;
vec3 matemissive;
vec3 pos[4];
vec3 dir[4];
vec3 att[4];
float angle[4];
float spotCoef[4];
vec3 ambient[4];
vec3 diffuse[4];
vec3 specular[4];
)";
static const char *cb_vs_lightsStr =
R"( float4 globalAmbient;
float3 matdiffuse;
float4 matspecular;
float3 matemissive;
float3 pos[4];
float3 dir[4];
float3 att[4];
float angle[4];
float spotCoef[4];
float3 ambient[4];
float3 diffuse[4];
float3 specular[4];
)";
// With some cleverness, we could get away with uploading just half this when only the four first
// bones are being used. This is 512b, 256b would be great.
// Could also move to 4x3 matrices - would let us fit 5 bones into 256b.
struct UB_VS_Bones {
float bones[8][16];
};
static const char *ub_vs_bonesStr =
R"( mat4 m[8];
)";
static const char *cb_vs_bonesStr =
R"( matrix m[8];
)";
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms);
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);

View File

@ -0,0 +1,5 @@
#include "GPU/D3D11/FragmentShaderGeneratorD3D11.h"
void GenerateFragmentShaderD3D11(const ShaderID &id, char *buffer) {
}

View File

@ -0,0 +1,5 @@
#pragma once
#include "GPU/Common/ShaderId.h"
void GenerateFragmentShaderD3D11(const ShaderID &id, char *buffer);

View File

@ -0,0 +1,273 @@
// Copyright (c) 2015- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#ifdef _WIN32
#define SHADERLOG
#endif
#include <d3d11.h>
#include <d3dcompiler.h>
#include <map>
#include "base/logging.h"
#include "math/lin/matrix4x4.h"
#include "math/math_util.h"
#include "math/dataconv.h"
#include "util/text/utf8.h"
#include "thin3d/d3d11_loader.h"
#include "Common/Common.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/D3D11/ShaderManagerD3D11.h"
#include "GPU/D3D11/FragmentShaderGeneratorD3D11.h"
#include "GPU/D3D11/VertexShaderGeneratorD3D11.h"
D3D11FragmentShader::D3D11FragmentShader(ID3D11Device *device, ShaderID id, const char *code, bool useHWTransform)
: device_(device), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(0) {
source_ = code;
std::string errorMessage;
#ifdef SHADERLOG
OutputDebugStringA(code);
#endif
uint8_t *bytecode;
UINT bytecodeSize;
HRESULT hr = device_->CreatePixelShader(bytecode, bytecodeSize, nullptr, &module_);
if (FAILED(hr)) {
failed_ = true;
return;
}
}
D3D11FragmentShader::~D3D11FragmentShader() {
if (module_)
module_->Release();
}
std::string D3D11FragmentShader::GetShaderString(DebugShaderStringType type) const {
switch (type) {
case SHADER_STRING_SOURCE_CODE:
return source_;
case SHADER_STRING_SHORT_DESC:
return FragmentShaderDesc(id_);
default:
return "N/A";
}
}
D3D11VertexShader::D3D11VertexShader(ID3D11Device *device, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting)
: device_(device), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(nullptr), usesLighting_(usesLighting) {
source_ = code;
std::string errorMessage;
std::vector<uint32_t> spirv;
#ifdef SHADERLOG
OutputDebugStringA(code);
#endif
uint8_t *bytecode;
UINT bytecodeSize;
HRESULT hr = device_->CreateVertexShader(bytecode, bytecodeSize, nullptr, &module_);
if (FAILED(hr)) {
failed_ = true;
return;
}
}
D3D11VertexShader::~D3D11VertexShader() {
if (module_)
module_->Release();
}
std::string D3D11VertexShader::GetShaderString(DebugShaderStringType type) const {
switch (type) {
case SHADER_STRING_SOURCE_CODE:
return source_;
case SHADER_STRING_SHORT_DESC:
return VertexShaderDesc(id_);
default:
return "N/A";
}
}
ShaderManagerD3D11::ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context)
: device_(device), context_(context), lastVShader_(nullptr), lastFShader_(nullptr) {
codeBuffer_ = new char[16384];
memset(&ub_base, 0, sizeof(ub_base));
memset(&ub_lights, 0, sizeof(ub_lights));
memset(&ub_bones, 0, sizeof(ub_bones));
ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
}
ShaderManagerD3D11::~ShaderManagerD3D11() {
ClearShaders();
delete[] codeBuffer_;
}
void ShaderManagerD3D11::Clear() {
for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) {
delete iter->second;
}
for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) {
delete iter->second;
}
fsCache_.clear();
vsCache_.clear();
lastFSID_.clear();
lastVSID_.clear();
}
void ShaderManagerD3D11::ClearShaders() {
Clear();
DirtyShader();
gstate_c.Dirty(DIRTY_ALL_UNIFORMS);
}
void ShaderManagerD3D11::DirtyShader() {
// Forget the last shader ID
lastFSID_.clear();
lastVSID_.clear();
lastVShader_ = nullptr;
lastFShader_ = nullptr;
}
void ShaderManagerD3D11::DirtyLastShader() { // disables vertex arrays
lastVShader_ = nullptr;
lastFShader_ = nullptr;
}
uint64_t ShaderManagerD3D11::UpdateUniforms() {
uint64_t dirty = gstate_c.GetDirtyUniforms();
if (dirty != 0) {
if (dirty & DIRTY_BASE_UNIFORMS)
BaseUpdateUniforms(&ub_base, dirty);
if (dirty & DIRTY_LIGHT_UNIFORMS)
LightUpdateUniforms(&ub_lights, dirty);
if (dirty & DIRTY_BONE_UNIFORMS)
BoneUpdateUniforms(&ub_bones, dirty);
}
gstate_c.CleanUniforms();
return dirty;
}
void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform) {
ShaderID VSID;
ShaderID FSID;
ComputeVertexShaderID(&VSID, vertType, useHWTransform);
ComputeFragmentShaderID(&FSID);
// Just update uniforms if this is the same shader as last time.
if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) {
*vshader = lastVShader_;
*fshader = lastFShader_;
// Already all set, no need to look up in shader maps.
return;
}
VSCache::iterator vsIter = vsCache_.find(VSID);
D3D11VertexShader *vs;
if (vsIter == vsCache_.end()) {
// Vertex shader not in cache. Let's compile it.
bool usesLighting;
GenerateVertexShaderD3D11(VSID, codeBuffer_, &usesLighting);
vs = new D3D11VertexShader(device_, VSID, codeBuffer_, vertType, useHWTransform, usesLighting);
vsCache_[VSID] = vs;
} else {
vs = vsIter->second;
}
lastVSID_ = VSID;
FSCache::iterator fsIter = fsCache_.find(FSID);
D3D11FragmentShader *fs;
if (fsIter == fsCache_.end()) {
// Fragment shader not in cache. Let's compile it.
GenerateFragmentShaderD3D11(FSID, codeBuffer_);
fs = new D3D11FragmentShader(device_, FSID, codeBuffer_, useHWTransform);
fsCache_[FSID] = fs;
} else {
fs = fsIter->second;
}
lastFSID_ = FSID;
lastVShader_ = vs;
lastFShader_ = fs;
*vshader = vs;
*fshader = fs;
}
std::vector<std::string> ShaderManagerD3D11::DebugGetShaderIDs(DebugShaderType type) {
std::string id;
std::vector<std::string> ids;
switch (type) {
case SHADER_TYPE_VERTEX:
{
for (auto iter : vsCache_) {
iter.first.ToString(&id);
ids.push_back(id);
}
break;
}
case SHADER_TYPE_FRAGMENT:
{
for (auto iter : fsCache_) {
iter.first.ToString(&id);
ids.push_back(id);
}
break;
}
default:
break;
}
return ids;
}
std::string ShaderManagerD3D11::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
ShaderID shaderId;
shaderId.FromString(id);
switch (type) {
case SHADER_TYPE_VERTEX:
{
auto iter = vsCache_.find(shaderId);
if (iter == vsCache_.end()) {
return "";
}
return iter->second->GetShaderString(stringType);
}
case SHADER_TYPE_FRAGMENT:
{
auto iter = fsCache_.find(shaderId);
if (iter == fsCache_.end()) {
return "";
}
return iter->second->GetShaderString(stringType);
}
default:
return "N/A";
}
}

View File

@ -0,0 +1,145 @@
// Copyright (c) 2017- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <map>
#include <d3d11.h>
#include "base/basictypes.h"
#include "Globals.h"
#include "GPU/Common/ShaderCommon.h"
#include "GPU/Common/ShaderId.h"
// #include "GPU/DX9/VertexShaderGeneratorD3D11.h"
// #include "GPU/DX9/FragmentShaderGeneratorD3D11.h"
#include "math/lin/matrix4x4.h"
#include "GPU/Common/ShaderUniforms.h"
class D3D11Context;
class D3D11PushBuffer;
class D3D11FragmentShader {
public:
D3D11FragmentShader(ID3D11Device *device, ShaderID id, const char *code, bool useHWTransform);
~D3D11FragmentShader();
const std::string &source() const { return source_; }
bool Failed() const { return failed_; }
bool UseHWTransform() const { return useHWTransform_; }
std::string GetShaderString(DebugShaderStringType type) const;
ID3D11PixelShader *GetShader() const { return module_; }
protected:
ID3D11PixelShader *module_;
ID3D11Device *device_;
std::string source_;
bool failed_;
bool useHWTransform_;
ShaderID id_;
};
class D3D11VertexShader {
public:
D3D11VertexShader(ID3D11Device *device, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting);
~D3D11VertexShader();
const std::string &source() const { return source_; }
bool Failed() const { return failed_; }
bool UseHWTransform() const { return useHWTransform_; }
bool HasBones() const {
return id_.Bit(VS_BIT_ENABLE_BONES);
}
bool HasLights() const {
return usesLighting_;
}
std::string GetShaderString(DebugShaderStringType type) const;
ID3D11VertexShader *GetModule() const { return module_; }
protected:
ID3D11VertexShader *module_;
ID3D11Device *device_;
std::string source_;
bool failed_;
bool useHWTransform_;
bool usesLighting_;
ShaderID id_;
};
class D3D11PushBuffer;
class ShaderManagerD3D11 : public ShaderManagerCommon {
public:
ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context);
~ShaderManagerD3D11();
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform);
void ClearShaders();
void DirtyShader();
void DirtyLastShader();
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
int GetNumFragmentShaders() const { return (int)fsCache_.size(); }
std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
uint64_t UpdateUniforms();
// TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer.
// Applies dirty changes and copies the buffer.
bool IsBaseDirty() { return true; }
bool IsLightDirty() { return true; }
bool IsBoneDirty() { return true; }
/*
uint32_t PushBaseBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
uint32_t PushLightBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
uint32_t PushBoneBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
*/
private:
void Clear();
ID3D11Device *device_;
ID3D11DeviceContext *context_;
typedef std::map<ShaderID, D3D11FragmentShader *> FSCache;
FSCache fsCache_;
typedef std::map<ShaderID, D3D11VertexShader *> VSCache;
VSCache vsCache_;
char *codeBuffer_;
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
UB_VS_FS_Base ub_base;
UB_VS_Lights ub_lights;
UB_VS_Bones ub_bones;
D3D11FragmentShader *lastFShader_;
D3D11VertexShader *lastVShader_;
ShaderID lastFSID_;
ShaderID lastVSID_;
};

View File

@ -0,0 +1,379 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <d3d11.h>
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/ge_constants.h"
#include "GPU/Common/GPUStateUtils.h"
#include "Core/System.h"
#include "Core/Config.h"
#include "Core/Reporting.h"
#include "GPU/Common/FramebufferCommon.h"
// These tables all fit into u8s.
static const D3D11_BLEND d3d11BlendFactorLookup[(size_t)BlendFactor::COUNT] = {
D3D11_BLEND_ZERO,
D3D11_BLEND_ONE,
D3D11_BLEND_SRC_COLOR,
D3D11_BLEND_INV_SRC_COLOR,
D3D11_BLEND_DEST_COLOR,
D3D11_BLEND_INV_DEST_COLOR,
D3D11_BLEND_SRC_ALPHA,
D3D11_BLEND_INV_SRC_ALPHA,
D3D11_BLEND_DEST_ALPHA,
D3D11_BLEND_INV_DEST_ALPHA,
D3D11_BLEND_BLEND_FACTOR,
D3D11_BLEND_INV_BLEND_FACTOR,
D3D11_BLEND_BLEND_FACTOR,
D3D11_BLEND_INV_BLEND_FACTOR,
D3D11_BLEND_SRC1_COLOR,
D3D11_BLEND_INV_SRC1_COLOR,
D3D11_BLEND_SRC1_ALPHA,
D3D11_BLEND_INV_SRC1_ALPHA,
};
static const D3D11_BLEND_OP d3d11BlendEqLookup[(size_t)BlendEq::COUNT] = {
D3D11_BLEND_OP_ADD,
D3D11_BLEND_OP_SUBTRACT,
D3D11_BLEND_OP_REV_SUBTRACT,
D3D11_BLEND_OP_MIN,
D3D11_BLEND_OP_MAX,
};
static const D3D11_CULL_MODE cullingMode[] = {
D3D11_CULL_BACK,
D3D11_CULL_FRONT,
};
static const D3D11_COMPARISON_FUNC compareOps[] = {
D3D11_COMPARISON_NEVER,
D3D11_COMPARISON_ALWAYS,
D3D11_COMPARISON_EQUAL,
D3D11_COMPARISON_NOT_EQUAL,
D3D11_COMPARISON_LESS,
D3D11_COMPARISON_LESS_EQUAL,
D3D11_COMPARISON_GREATER,
D3D11_COMPARISON_GREATER_EQUAL,
};
static const D3D11_STENCIL_OP stencilOps[] = {
D3D11_STENCIL_OP_KEEP,
D3D11_STENCIL_OP_ZERO,
D3D11_STENCIL_OP_REPLACE,
D3D11_STENCIL_OP_INVERT,
D3D11_STENCIL_OP_INCR_SAT,
D3D11_STENCIL_OP_DECR_SAT,
D3D11_STENCIL_OP_KEEP, // reserved
D3D11_STENCIL_OP_KEEP, // reserved
};
static const D3D11_PRIMITIVE_TOPOLOGY primToD3D11[8] = {
D3D11_PRIMITIVE_TOPOLOGY_POINTLIST,
D3D11_PRIMITIVE_TOPOLOGY_LINELIST,
D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP,
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED, // D3D11 doesn't do triangle fans.
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
};
// These are actually the same exact values/order/etc. as the GE ones, but for clarity...
/*
static const D3D11_LOGIC_OP logicOps[] = {
D3D11_LOGIC_OP_CLEAR,
D3D11_LOGIC_OP_AND,
D3D11_LOGIC_OP_AND_REVERSE,
D3D11_LOGIC_OP_COPY,
D3D11_LOGIC_OP_AND_INVERTED,
D3D11_LOGIC_OP_NO_OP,
D3D11_LOGIC_OP_XOR,
D3D11_LOGIC_OP_OR,
D3D11_LOGIC_OP_NOR,
D3D11_LOGIC_OP_EQUIVALENT,
D3D11_LOGIC_OP_INVERT,
D3D11_LOGIC_OP_OR_REVERSE,
D3D11_LOGIC_OP_COPY_INVERTED,
D3D11_LOGIC_OP_OR_INVERTED,
D3D11_LOGIC_OP_NAND,
D3D11_LOGIC_OP_SET,
};
*/
static bool ApplyShaderBlending() {
return false;
}
static void ResetShaderBlending() {
//
}
class FramebufferManagerD3D11;
class ShaderManagerD3D11;
// TODO: Do this more progressively. No need to compute the entire state if the entire state hasn't changed.
// In Vulkan, we simply collect all the state together into a "pipeline key" - we don't actually set any state here
// (the caller is responsible for setting the little dynamic state that is supported, dynState).
struct D3D11BlendKey {
// Blend
unsigned int blendEnable : 1;
unsigned int srcColor : 5; // D3D11_BLEND
unsigned int destColor : 5; // D3D11_BLEND
unsigned int srcAlpha : 5; // D3D11_BLEND
unsigned int destAlpha : 5; // D3D11_BLEND
unsigned int blendOpColor : 3; // D3D11_BLEND_OP
unsigned int blendOpAlpha : 3; // D3D11_BLEND_OP
unsigned int logicOpEnable : 1;
unsigned int logicOp : 4; // D3D11_LOGIC_OP
unsigned int colorWriteMask : 4;
};
struct D3D11DepthStencilKey {
// Depth/Stencil
unsigned int depthTestEnable : 1;
unsigned int depthWriteEnable : 1;
unsigned int depthCompareOp : 3; // D3D11_COMPARISON
unsigned int stencilTestEnable : 1;
unsigned int stencilCompareOp : 3; // D3D11_COMPARISON
unsigned int stencilPassOp : 4; // D3D11_STENCIL_OP
unsigned int stencilFailOp : 4; // D3D11_STENCIL_OP
unsigned int stencilDepthFailOp : 4; // D3D11_STENCIL_OP
};
struct D3D11RasterKey {
unsigned int cullMode : 2; // D3D11_CULL_MODE
};
// In D3D11 we cache blend state objects etc, and we simply emit keys, which are then also used to create these objects.
struct D3D11StateKeys {
D3D11BlendKey blend;
D3D11DepthStencilKey depthStencil;
D3D11RasterKey raster;
};
struct D3D11DynamicState {
int topology;
bool useBlendColor;
uint32_t blendColor;
bool useStencil;
uint8_t stencilRef;
uint8_t stencilWriteMask;
uint8_t stencilCompareMask;
D3D11_VIEWPORT viewport;
D3D11_RECT scissor;
};
void ConvertStateToKeys(FramebufferManagerCommon *fbManager, ShaderManagerD3D11 *shaderManager, int prim, D3D11StateKeys &key, D3D11DynamicState &dynState) {
memset(&key, 0, sizeof(key));
memset(&dynState, 0, sizeof(dynState));
// Unfortunately, this isn't implemented yet.
gstate_c.allowShaderBlend = false;
// Set blend - unless we need to do it in the shader.
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowShaderBlend);
bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
ViewportAndScissor vpAndScissor;
ConvertViewportAndScissor(useBufferedRendering,
fbManager->GetRenderWidth(), fbManager->GetRenderHeight(),
fbManager->GetTargetBufferWidth(), fbManager->GetTargetBufferHeight(),
vpAndScissor);
if (blendState.applyShaderBlending) {
if (ApplyShaderBlending()) {
// We may still want to do something about stencil -> alpha.
ApplyStencilReplaceAndLogicOp(blendState.replaceAlphaWithStencil, blendState);
} else {
// Until next time, force it off.
ResetShaderBlending();
gstate_c.allowShaderBlend = false;
}
} else if (blendState.resetShaderBlending) {
ResetShaderBlending();
}
if (blendState.enabled) {
key.blend.blendEnable = true;
key.blend.blendOpColor = d3d11BlendEqLookup[(size_t)blendState.eqColor];
key.blend.blendOpAlpha = d3d11BlendEqLookup[(size_t)blendState.eqAlpha];
key.blend.srcColor = d3d11BlendFactorLookup[(size_t)blendState.srcColor];
key.blend.srcAlpha = d3d11BlendFactorLookup[(size_t)blendState.srcAlpha];
key.blend.destColor = d3d11BlendFactorLookup[(size_t)blendState.dstColor];
key.blend.destAlpha = d3d11BlendFactorLookup[(size_t)blendState.dstAlpha];
if (blendState.dirtyShaderBlend) {
gstate_c.Dirty(DIRTY_SHADERBLEND);
}
dynState.useBlendColor = blendState.useBlendColor;
if (blendState.useBlendColor) {
dynState.blendColor = blendState.blendColor;
}
} else {
key.blend.blendEnable = false;
dynState.useBlendColor = false;
}
dynState.useStencil = false;
// Set ColorMask/Stencil/Depth
if (gstate.isModeClear()) {
key.blend.logicOpEnable = false;
key.raster.cullMode = D3D11_CULL_NONE;
key.depthStencil.depthTestEnable = true;
key.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
key.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask();
if (gstate.isClearModeDepthMask()) {
fbManager->SetDepthUpdated();
}
// Color Test
bool colorMask = gstate.isClearModeColorMask();
bool alphaMask = gstate.isClearModeAlphaMask();
key.blend.colorWriteMask = (colorMask ? (1 | 2 | 4) : 0) | (alphaMask ? 8 : 0);
// Stencil Test
if (alphaMask) {
key.depthStencil.stencilTestEnable = true;
key.depthStencil.stencilCompareOp = D3D11_COMPARISON_ALWAYS;
key.depthStencil.stencilPassOp = D3D11_STENCIL_OP_REPLACE;
key.depthStencil.stencilFailOp = D3D11_STENCIL_OP_REPLACE;
key.depthStencil.stencilDepthFailOp = D3D11_STENCIL_OP_REPLACE;
dynState.useStencil = true;
// In clear mode, the stencil value is set to the alpha value of the vertex.
// A normal clear will be 2 points, the second point has the color.
// We override this value in the pipeline from software transform for clear rectangles.
dynState.stencilRef = 0xFF;
dynState.stencilWriteMask = 0xFF;
} else {
key.depthStencil.stencilTestEnable = false;
dynState.useStencil = false;
}
} else {
if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
// Logic Ops
if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) {
key.blend.logicOpEnable = true;
// key.blendKey.logicOp = logicOps[gstate.getLogicOp()];
} else {
key.blend.logicOpEnable = false;
}
}
// Set cull
bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
key.raster.cullMode = wantCull ? (gstate.getCullMode() ? D3D11_CULL_FRONT : D3D11_CULL_BACK) : D3D11_CULL_NONE;
// Depth Test
if (gstate.isDepthTestEnabled()) {
key.depthStencil.depthTestEnable = true;
key.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
key.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled();
if (gstate.isDepthWriteEnabled()) {
fbManager->SetDepthUpdated();
}
} else {
key.depthStencil.depthTestEnable = false;
key.depthStencil.depthWriteEnable = false;
key.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
}
// PSP color/alpha mask is per bit but we can only support per byte.
// But let's do that, at least. And let's try a threshold.
bool rmask = (gstate.pmskc & 0xFF) < 128;
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
bool amask = (gstate.pmska & 0xFF) < 128;
#ifndef MOBILE_DEVICE
u8 abits = (gstate.pmska >> 0) & 0xFF;
u8 rbits = (gstate.pmskc >> 0) & 0xFF;
u8 gbits = (gstate.pmskc >> 8) & 0xFF;
u8 bbits = (gstate.pmskc >> 16) & 0xFF;
if ((rbits != 0 && rbits != 0xFF) || (gbits != 0 && gbits != 0xFF) || (bbits != 0 && bbits != 0xFF)) {
WARN_LOG_REPORT_ONCE(rgbmask, G3D, "Unsupported RGB mask: r=%02x g=%02x b=%02x", rbits, gbits, bbits);
}
if (abits != 0 && abits != 0xFF) {
// The stencil part of the mask is supported.
WARN_LOG_REPORT_ONCE(amask, G3D, "Unsupported alpha/stencil mask: %02x", abits);
}
#endif
// Let's not write to alpha if stencil isn't enabled.
if (!gstate.isStencilTestEnabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
amask = false;
}
}
key.blend.colorWriteMask = (rmask ? 1 : 0) | (gmask ? 2 : 0) | (bmask ? 4 : 0) | (amask ? 8 : 0);
GenericStencilFuncState stencilState;
ConvertStencilFuncState(stencilState);
// Stencil Test
if (stencilState.enabled) {
key.depthStencil.stencilTestEnable = true;
key.depthStencil.stencilCompareOp = compareOps[stencilState.testFunc];
key.depthStencil.stencilPassOp = stencilOps[stencilState.zPass];
key.depthStencil.stencilFailOp = stencilOps[stencilState.sFail];
key.depthStencil.stencilDepthFailOp = stencilOps[stencilState.zFail];
dynState.useStencil = true;
dynState.stencilRef = stencilState.testRef;
dynState.stencilCompareMask = stencilState.testMask;
dynState.stencilWriteMask = stencilState.writeMask;
} else {
key.depthStencil.stencilTestEnable = false;
dynState.useStencil = false;
}
}
dynState.topology = primToD3D11[prim];
D3D11_VIEWPORT &vp = dynState.viewport;
vp.TopLeftX = vpAndScissor.viewportX;
vp.TopLeftY = vpAndScissor.viewportY;
vp.Width = vpAndScissor.viewportW;
vp.Height = vpAndScissor.viewportH;
vp.MinDepth = vpAndScissor.depthRangeMin;
vp.MaxDepth = vpAndScissor.depthRangeMax;
if (vpAndScissor.dirtyProj) {
gstate_c.Dirty(DIRTY_PROJMATRIX);
}
D3D11_RECT &scissor = dynState.scissor;
scissor.left = vpAndScissor.scissorX;
scissor.top = vpAndScissor.scissorY;
scissor.right = vpAndScissor.scissorX + vpAndScissor.scissorW;
scissor.bottom = vpAndScissor.scissorY + vpAndScissor.scissorH;
float depthMin = vpAndScissor.depthRangeMin;
float depthMax = vpAndScissor.depthRangeMax;
if (depthMin < 0.0f) depthMin = 0.0f;
if (depthMax > 1.0f) depthMax = 1.0f;
if (vpAndScissor.dirtyDepth) {
gstate_c.Dirty(DIRTY_DEPTHRANGE);
}
}

View File

@ -0,0 +1,5 @@
#include "GPU/D3D11/VertexShaderGeneratorD3D11.h"
void GenerateVertexShaderD3D11(const ShaderID &id, char *buffer, bool *usesLighting) {
}

View File

@ -0,0 +1,5 @@
#pragma once
#include "GPU/Common/ShaderId.h"
void GenerateVertexShaderD3D11(const ShaderID &id, char *buffer, bool *usesLighting);

View File

@ -17,8 +17,6 @@
#pragma once
#include "Globals.h"
#include "GPU/Common/ShaderId.h"
namespace DX9 {

View File

@ -83,7 +83,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_SECURE_NO_WARNINGS;WIN32;_ARCH_32=1;_M_IX86=1;_DEBUG;_LIB;_UNICODE;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
@ -105,7 +105,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
<OmitFramePointers>false</OmitFramePointers>
@ -131,7 +131,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
@ -157,7 +157,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
<FloatingPointModel>Fast</FloatingPointModel>
@ -190,6 +190,7 @@
<ClInclude Include="Common\PostShader.h" />
<ClInclude Include="Common\ShaderCommon.h" />
<ClInclude Include="Common\ShaderId.h" />
<ClInclude Include="Common\ShaderUniforms.h" />
<ClInclude Include="Common\SoftwareTransformCommon.h" />
<ClInclude Include="Common\SplineCommon.h" />
<ClInclude Include="Common\TextureDecoderNEON.h">
@ -202,6 +203,9 @@
<ClInclude Include="Common\TextureScalerCommon.h" />
<ClInclude Include="Common\TransformCommon.h" />
<ClInclude Include="Common\VertexDecoderCommon.h" />
<ClInclude Include="D3D11\FragmentShaderGeneratorD3D11.h" />
<ClInclude Include="D3D11\ShaderManagerD3D11.h" />
<ClInclude Include="D3D11\VertexShaderGeneratorD3D11.h" />
<ClInclude Include="Debugger\Breakpoints.h" />
<ClInclude Include="Debugger\Stepping.h" />
<ClInclude Include="Directx9\DepalettizeShaderDX9.h" />
@ -261,6 +265,7 @@
<ClCompile Include="Common\IndexGenerator.cpp" />
<ClCompile Include="Common\PostShader.cpp" />
<ClCompile Include="Common\ShaderId.cpp" />
<ClCompile Include="Common\ShaderUniforms.cpp" />
<ClCompile Include="Common\SplineCommon.cpp" />
<ClCompile Include="Common\TextureDecoderNEON.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -286,6 +291,10 @@
</ClCompile>
<ClCompile Include="Common\VertexDecoderCommon.cpp" />
<ClCompile Include="Common\VertexDecoderX86.cpp" />
<ClCompile Include="D3D11\FragmentShaderGeneratorD3D11.cpp" />
<ClCompile Include="D3D11\ShaderManagerD3D11.cpp" />
<ClCompile Include="D3D11\StateMappingD3D11.cpp" />
<ClCompile Include="D3D11\VertexShaderGeneratorD3D11.cpp" />
<ClCompile Include="Debugger\Breakpoints.cpp" />
<ClCompile Include="Debugger\Stepping.cpp" />
<ClCompile Include="Directx9\DepalettizeShaderDX9.cpp" />

View File

@ -25,6 +25,9 @@
<Filter Include="Vulkan">
<UniqueIdentifier>{3c621896-140c-4c8b-8e4d-a478bfdeca8a}</UniqueIdentifier>
</Filter>
<Filter Include="D3D11">
<UniqueIdentifier>{88eb5cea-ec25-4881-89da-02f9f2fa8f3f}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ge_constants.h">
@ -222,6 +225,18 @@
<ClInclude Include="GLES\FragmentTestCacheGLES.h">
<Filter>GLES</Filter>
</ClInclude>
<ClInclude Include="Common\ShaderUniforms.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="D3D11\ShaderManagerD3D11.h">
<Filter>D3D11</Filter>
</ClInclude>
<ClInclude Include="D3D11\VertexShaderGeneratorD3D11.h">
<Filter>D3D11</Filter>
</ClInclude>
<ClInclude Include="D3D11\FragmentShaderGeneratorD3D11.h">
<Filter>D3D11</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Math3D.cpp">
@ -428,5 +443,20 @@
<ClCompile Include="GLES\FragmentTestCacheGLES.cpp">
<Filter>GLES</Filter>
</ClCompile>
<ClCompile Include="D3D11\StateMappingD3D11.cpp">
<Filter>D3D11</Filter>
</ClCompile>
<ClCompile Include="Common\ShaderUniforms.cpp">
<Filter>Common</Filter>
</ClCompile>
<ClCompile Include="D3D11\ShaderManagerD3D11.cpp">
<Filter>D3D11</Filter>
</ClCompile>
<ClCompile Include="D3D11\VertexShaderGeneratorD3D11.cpp">
<Filter>D3D11</Filter>
</ClCompile>
<ClCompile Include="D3D11\FragmentShaderGeneratorD3D11.cpp">
<Filter>D3D11</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -150,12 +150,6 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons
}
}
static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
const Vec3 trans(0, 0, gstate_c.vpZOffset * 0.5f + 0.5f);
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
in.translateAndScale(trans, scale);
}
ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) {
codeBuffer_ = new char[16384];
@ -187,207 +181,6 @@ uint32_t ShaderManagerVulkan::PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *b
return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
}
void ShaderManagerVulkan::BaseUpdateUniforms(uint64_t dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXENV) {
Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor);
}
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
}
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
}
if (dirtyUniforms & DIRTY_TEXCLAMP) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
const int w = gstate.getTextureWidth(0);
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
// First wrap xy, then half texel xy (for clamp.)
ub_base.texClamp[0] = widthFactor;
ub_base.texClamp[1] = heightFactor;
ub_base.texClamp[2] = invW * 0.5f;
ub_base.texClamp[3] = invH * 0.5f;
ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW;
ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH;
}
if (dirtyUniforms & DIRTY_PROJMATRIX) {
Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
const bool invertedY = gstate_c.vpHeight < 0;
if (invertedY) {
flippedMatrix[1] = -flippedMatrix[1];
flippedMatrix[5] = -flippedMatrix[5];
flippedMatrix[9] = -flippedMatrix[9];
flippedMatrix[13] = -flippedMatrix[13];
}
const bool invertedX = gstate_c.vpWidth < 0;
if (invertedX) {
flippedMatrix[0] = -flippedMatrix[0];
flippedMatrix[4] = -flippedMatrix[4];
flippedMatrix[8] = -flippedMatrix[8];
flippedMatrix[12] = -flippedMatrix[12];
}
ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr());
}
if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
Matrix4x4 proj_through;
proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr());
}
// Transform
if (dirtyUniforms & DIRTY_WORLDMATRIX) {
ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix);
}
if (dirtyUniforms & DIRTY_VIEWMATRIX) {
ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix);
}
if (dirtyUniforms & DIRTY_TEXMATRIX) {
ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
}
// Combined two small uniforms
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
float fogcoef_stencil[3] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
(float)gstate.getStencilTestRef()
};
if (my_isinf(fogcoef_stencil[1])) {
// not really sure what a sensible value might be.
fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
} else if (my_isnan(fogcoef_stencil[1])) {
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
// Just put the fog far away at a large finite distance.
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
// so it's best to just make it a sane calculation.
fogcoef_stencil[0] = 100000.0f;
fogcoef_stencil[1] = 1.0f;
}
#ifndef MOBILE_DEVICE
else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
}
#endif
CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
}
// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
Uint8x3ToFloat4_AlphaUint8(ub_base.matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
}
// Texturing
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
const int w = gstate.getTextureWidth(0);
const int h = gstate.getTextureHeight(0);
const float widthFactor = (float)w * invW;
const float heightFactor = (float)h * invH;
ub_base.uvScaleOffset[0] = widthFactor;
ub_base.uvScaleOffset[1] = heightFactor;
ub_base.uvScaleOffset[2] = 0.0f;
ub_base.uvScaleOffset[3] = 0.0f;
}
if (dirtyUniforms & DIRTY_DEPTHRANGE) {
float viewZScale = gstate.getViewportZScale();
float viewZCenter = gstate.getViewportZCenter();
float viewZInvScale;
// We had to scale and translate Z to account for our clamped Z range.
// Therefore, we also need to reverse this to round properly.
//
// Example: scale = 65535.0, center = 0.0
// Resulting range = -65535 to 65535, clamped to [0, 65535]
// gstate_c.vpDepthScale = 2.0f
// gstate_c.vpZOffset = -1.0f
//
// The projection already accounts for those, so we need to reverse them.
//
// Additionally, D3D9 uses a range from [0, 1]. We double and move the center.
viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
if (viewZScale != 0.0) {
viewZInvScale = 1.0f / viewZScale;
} else {
viewZInvScale = 0.0;
}
ub_base.depthRange[0] = viewZScale;
ub_base.depthRange[1] = viewZCenter;
ub_base.depthRange[2] = viewZCenter;
ub_base.depthRange[3] = viewZInvScale;
}
}
void ShaderManagerVulkan::LightUpdateUniforms(uint64_t dirtyUniforms) {
// Lighting
if (dirtyUniforms & DIRTY_AMBIENT) {
Uint8x3ToFloat4_AlphaUint8(ub_lights.ambientColor, gstate.ambientcolor, gstate.getAmbientA());
}
if (dirtyUniforms & DIRTY_MATDIFFUSE) {
Uint8x3ToFloat4(ub_lights.materialDiffuse, gstate.materialdiffuse);
}
if (dirtyUniforms & DIRTY_MATEMISSIVE) {
Uint8x3ToFloat4(ub_lights.materialEmissive, gstate.materialemissive);
}
if (dirtyUniforms & DIRTY_MATSPECULAR) {
Uint8x3ToFloat4_Alpha(ub_lights.materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
CopyFloat3To4(ub_lights.lpos[i], vec);
} else {
ExpandFloat24x3ToFloat4(ub_lights.lpos[i], &gstate.lpos[i * 3]);
}
ExpandFloat24x3ToFloat4(ub_lights.ldir[i], &gstate.ldir[i * 3]);
ExpandFloat24x3ToFloat4(ub_lights.latt[i], &gstate.latt[i * 3]);
CopyFloat1To4(ub_lights.lightAngle[i], getFloat24(gstate.lcutoff[i]));
CopyFloat1To4(ub_lights.lightSpotCoef[i], getFloat24(gstate.lconv[i]));
Uint8x3ToFloat4(ub_lights.lightAmbient[i], gstate.lcolor[i * 3]);
Uint8x3ToFloat4(ub_lights.lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
Uint8x3ToFloat4(ub_lights.lightSpecular[i], gstate.lcolor[i * 3 + 2]);
}
}
}
void ShaderManagerVulkan::BoneUpdateUniforms(uint64_t dirtyUniforms) {
for (int i = 0; i < 8; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(ub_bones.bones[i], gstate.boneMatrix + 12 * i);
}
}
}
void ShaderManagerVulkan::DeviceRestore(VulkanContext *vulkan) {
vulkan_ = vulkan;
uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
@ -429,11 +222,11 @@ uint64_t ShaderManagerVulkan::UpdateUniforms() {
uint64_t dirty = gstate_c.GetDirtyUniforms();
if (dirty != 0) {
if (dirty & DIRTY_BASE_UNIFORMS)
BaseUpdateUniforms(dirty);
BaseUpdateUniforms(&ub_base, dirty);
if (dirty & DIRTY_LIGHT_UNIFORMS)
LightUpdateUniforms(dirty);
LightUpdateUniforms(&ub_lights, dirty);
if (dirty & DIRTY_BONE_UNIFORMS)
BoneUpdateUniforms(dirty);
BoneUpdateUniforms(&ub_bones, dirty);
}
gstate_c.CleanUniforms();
return dirty;

View File

@ -27,104 +27,7 @@
#include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
#include "GPU/Vulkan/VulkanUtil.h"
#include "math/lin/matrix4x4.h"
void ConvertProjMatrixToVulkan(Matrix4x4 & in);
// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
};
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
struct UB_VS_FS_Base {
float proj[16];
float proj_through[16];
float view[16];
float world[16];
float tex[16]; // not that common, may want to break out
float uvScaleOffset[4];
float depthRange[4];
float fogCoef_stencil[4];
float matAmbient[4];
// Fragment data
float fogColor[4];
float texEnvColor[4];
int alphaColorRef[4];
int colorTestMask[4];
float blendFixA[4];
float blendFixB[4];
float texClamp[4];
float texClampOffset[4];
};
static const char *ub_baseStr =
R"( mat4 proj_mtx;
mat4 proj_through_mtx;
mat4 view_mtx;
mat4 world_mtx;
mat4 tex_mtx;
vec4 uvscaleoffset;
vec4 depthRange;
vec3 fogcoef_stencilreplace;
vec4 matambientalpha;
vec3 fogcolor;
vec3 texenv;
ivec4 alphacolorref;
ivec4 alphacolormask;
vec3 blendFixA;
vec3 blendFixB;
vec4 texclamp;
vec2 texclampoff;
)";
// 576 bytes. Can we get down to 512?
struct UB_VS_Lights {
float ambientColor[4];
float materialDiffuse[4];
float materialSpecular[4];
float materialEmissive[4];
float lpos[4][4];
float ldir[4][4];
float latt[4][4];
float lightAngle[4][4]; // TODO: Merge with lightSpotCoef, use .xy
float lightSpotCoef[4][4];
float lightAmbient[4][4];
float lightDiffuse[4][4];
float lightSpecular[4][4];
};
static const char *ub_vs_lightsStr =
R"( vec4 globalAmbient;
vec3 matdiffuse;
vec4 matspecular;
vec3 matemissive;
vec3 pos[4];
vec3 dir[4];
vec3 att[4];
float angle[4];
float spotCoef[4];
vec3 ambient[4];
vec3 diffuse[4];
vec3 specular[4];
)";
// With some cleverness, we could get away with uploading just half this when only the four first
// bones are being used. This is 512b, 256b would be great.
// Could also move to 4x3 matrices - would let us fit 5 bones into 256b.
struct UB_VS_Bones {
float bones[8][16];
};
static const char *ub_vs_bonesStr =
R"( mat4 m[8];
)";
#include "GPU/Common/ShaderUniforms.h"
class VulkanContext;
class VulkanPushBuffer;
@ -215,10 +118,6 @@ public:
uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf);
private:
void BaseUpdateUniforms(uint64_t dirtyUniforms);
void LightUpdateUniforms(uint64_t dirtyUniforms);
void BoneUpdateUniforms(uint64_t dirtyUniforms);
void Clear();
VulkanContext *vulkan_;