Start stubbing out a new D3D11 backend

2024-11-23 13:30:02 +00:00 · 2017-02-08 17:35:41 +01:00 · 2017-02-08 17:35:41 +01:00 · 175b97ef34
commit 175b97ef34
parent 9dd3e18ed4
15 changed files with 1234 additions and 324 deletions
--- a/GPU/Common/ShaderId.h
+++ b/GPU/Common/ShaderId.h
@ -1,5 +1,7 @@
 #pragma once

+#include <string>
+#include <cstdint>
 #include "base/basictypes.h"

 // TODO: There will be additional bits, indicating that groups of these will be
@ -93,9 +95,9 @@ struct ShaderID {
 		}
 	}

-	u32 d[2];
+	uint32_t d[2];
 	bool operator < (const ShaderID &other) const {
-		for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
+		for (size_t i = 0; i < sizeof(d) / sizeof(uint32_t); i++) {
 			if (d[i] < other.d[i])
 				return true;
 			if (d[i] > other.d[i])
@ -104,7 +106,7 @@ struct ShaderID {
 		return false;
 	}
 	bool operator == (const ShaderID &other) const {
-		for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
+		for (size_t i = 0; i < sizeof(d) / sizeof(uint32_t); i++) {
 			if (d[i] != other.d[i])
 				return false;
 		}
@ -142,7 +144,7 @@ struct ShaderID {


 bool CanUseHardwareTransform(int prim);
-void ComputeVertexShaderID(ShaderID *id, u32 vertexType, bool useHWTransform);
+void ComputeVertexShaderID(ShaderID *id, uint32_t vertexType, bool useHWTransform);
 // Generates a compact string that describes the shader. Useful in a list to get an overview
 // of the current flora of shaders.
 std::string VertexShaderDesc(const ShaderID &id);
--- a/GPU/Common/ShaderUniforms.cpp
+++ b/GPU/Common/ShaderUniforms.cpp
@ -0,0 +1,215 @@
+#include "ShaderUniforms.h"
+#include "math/dataconv.h"
+#include "math/lin/matrix4x4.h"
+#include "math/math_util.h"
+#include "math/lin/vec3.h"
+#include "GPU/GPUState.h"
+#include "GPU/Math3D.h"
+#include "Core/Reporting.h"
+
+static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
+	const Vec3 trans(0, 0, gstate_c.vpZOffset * 0.5f + 0.5f);
+	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
+	in.translateAndScale(trans, scale);
+}
+
+void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms) {
+	if (dirtyUniforms & DIRTY_TEXENV) {
+		Uint8x3ToFloat4(ub->texEnvColor, gstate.texenvcolor);
+	}
+	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
+		Uint8x3ToInt4_Alpha(ub->alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
+	}
+	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
+		Uint8x3ToInt4_Alpha(ub->colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
+	}
+	if (dirtyUniforms & DIRTY_FOGCOLOR) {
+		Uint8x3ToFloat4(ub->fogColor, gstate.fogcolor);
+	}
+	if (dirtyUniforms & DIRTY_SHADERBLEND) {
+		Uint8x3ToFloat4(ub->blendFixA, gstate.getFixA());
+		Uint8x3ToFloat4(ub->blendFixB, gstate.getFixB());
+	}
+	if (dirtyUniforms & DIRTY_TEXCLAMP) {
+		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
+		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
+		const int w = gstate.getTextureWidth(0);
+		const int h = gstate.getTextureHeight(0);
+		const float widthFactor = (float)w * invW;
+		const float heightFactor = (float)h * invH;
+
+		// First wrap xy, then half texel xy (for clamp.)
+		ub->texClamp[0] = widthFactor;
+		ub->texClamp[1] = heightFactor;
+		ub->texClamp[2] = invW * 0.5f;
+		ub->texClamp[3] = invH * 0.5f;
+		ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
+		ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
+	}
+
+	if (dirtyUniforms & DIRTY_PROJMATRIX) {
+		Matrix4x4 flippedMatrix;
+		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
+
+		const bool invertedY = gstate_c.vpHeight < 0;
+		if (invertedY) {
+			flippedMatrix[1] = -flippedMatrix[1];
+			flippedMatrix[5] = -flippedMatrix[5];
+			flippedMatrix[9] = -flippedMatrix[9];
+			flippedMatrix[13] = -flippedMatrix[13];
+		}
+		const bool invertedX = gstate_c.vpWidth < 0;
+		if (invertedX) {
+			flippedMatrix[0] = -flippedMatrix[0];
+			flippedMatrix[4] = -flippedMatrix[4];
+			flippedMatrix[8] = -flippedMatrix[8];
+			flippedMatrix[12] = -flippedMatrix[12];
+		}
+		ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
+		CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
+	}
+
+	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
+		Matrix4x4 proj_through;
+		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
+		CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
+	}
+
+	// Transform
+	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
+		ConvertMatrix4x3To4x4(ub->world, gstate.worldMatrix);
+	}
+	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
+		ConvertMatrix4x3To4x4(ub->view, gstate.viewMatrix);
+	}
+	if (dirtyUniforms & DIRTY_TEXMATRIX) {
+		ConvertMatrix4x3To4x4(ub->tex, gstate.tgenMatrix);
+	}
+
+	// Combined two small uniforms
+	if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
+		float fogcoef_stencil[3] = {
+			getFloat24(gstate.fog1),
+			getFloat24(gstate.fog2),
+			(float)gstate.getStencilTestRef()
+		};
+		if (my_isinf(fogcoef_stencil[1])) {
+			// not really sure what a sensible value might be.
+			fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
+		} else if (my_isnan(fogcoef_stencil[1])) {
+			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
+			// Just put the fog far away at a large finite distance.
+			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
+			// so it's best to just make it a sane calculation.
+			fogcoef_stencil[0] = 100000.0f;
+			fogcoef_stencil[1] = 1.0f;
+		}
+#ifndef MOBILE_DEVICE
+		else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
+			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
+		}
+#endif
+		CopyFloat3(ub->fogCoef_stencil, fogcoef_stencil);
+	}
+
+	// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
+	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
+		Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
+	}
+
+	// Texturing
+	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
+		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
+		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
+		const int w = gstate.getTextureWidth(0);
+		const int h = gstate.getTextureHeight(0);
+		const float widthFactor = (float)w * invW;
+		const float heightFactor = (float)h * invH;
+		ub->uvScaleOffset[0] = widthFactor;
+		ub->uvScaleOffset[1] = heightFactor;
+		ub->uvScaleOffset[2] = 0.0f;
+		ub->uvScaleOffset[3] = 0.0f;
+	}
+
+	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
+		float viewZScale = gstate.getViewportZScale();
+		float viewZCenter = gstate.getViewportZCenter();
+		float viewZInvScale;
+
+		// We had to scale and translate Z to account for our clamped Z range.
+		// Therefore, we also need to reverse this to round properly.
+		//
+		// Example: scale = 65535.0, center = 0.0
+		// Resulting range = -65535 to 65535, clamped to [0, 65535]
+		// gstate_c.vpDepthScale = 2.0f
+		// gstate_c.vpZOffset = -1.0f
+		//
+		// The projection already accounts for those, so we need to reverse them.
+		//
+		// Additionally, D3D9 uses a range from [0, 1].  We double and move the center.
+		viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
+		viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
+
+		if (viewZScale != 0.0) {
+			viewZInvScale = 1.0f / viewZScale;
+		} else {
+			viewZInvScale = 0.0;
+		}
+
+		ub->depthRange[0] = viewZScale;
+		ub->depthRange[1] = viewZCenter;
+		ub->depthRange[2] = viewZCenter;
+		ub->depthRange[3] = viewZInvScale;
+	}
+}
+
+void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
+	// Lighting
+	if (dirtyUniforms & DIRTY_AMBIENT) {
+		Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
+	}
+	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
+		Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
+	}
+	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
+		Uint8x3ToFloat4(ub->materialEmissive, gstate.materialemissive);
+	}
+	if (dirtyUniforms & DIRTY_MATSPECULAR) {
+		Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
+	}
+
+	for (int i = 0; i < 4; i++) {
+		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
+			if (gstate.isDirectionalLight(i)) {
+				// Prenormalize
+				float x = getFloat24(gstate.lpos[i * 3 + 0]);
+				float y = getFloat24(gstate.lpos[i * 3 + 1]);
+				float z = getFloat24(gstate.lpos[i * 3 + 2]);
+				float len = sqrtf(x*x + y*y + z*z);
+				if (len == 0.0f)
+					len = 1.0f;
+				else
+					len = 1.0f / len;
+				float vec[3] = { x * len, y * len, z * len };
+				CopyFloat3To4(ub->lpos[i], vec);
+			} else {
+				ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
+			}
+			ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
+			ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
+			CopyFloat1To4(ub->lightAngle[i], getFloat24(gstate.lcutoff[i]));
+			CopyFloat1To4(ub->lightSpotCoef[i], getFloat24(gstate.lconv[i]));
+			Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
+			Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
+			Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
+		}
+	}
+}
+
+void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
+	for (int i = 0; i < 8; i++) {
+		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
+			ConvertMatrix4x3To4x4(ub->bones[i], gstate.boneMatrix + 12 * i);
+		}
+	}
+}
--- a/GPU/Common/ShaderUniforms.h
+++ b/GPU/Common/ShaderUniforms.h
@ -0,0 +1,147 @@
+#pragma once
+
+#include <cstdint>
+
+#include "ShaderCommon.h"
+
+// Used by the "modern" backends that use uniform buffers. They can share this without issue.
+
+// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
+enum : uint64_t {
+	DIRTY_BASE_UNIFORMS =
+	DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
+	DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
+	DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
+	DIRTY_LIGHT_UNIFORMS =
+	DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
+	DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
+};
+
+// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
+// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
+struct UB_VS_FS_Base {
+	float proj[16];
+	float proj_through[16];
+	float view[16];
+	float world[16];
+	float tex[16];  // not that common, may want to break out
+	float uvScaleOffset[4];
+	float depthRange[4];
+	float fogCoef_stencil[4];
+	float matAmbient[4];
+	// Fragment data
+	float fogColor[4];
+	float texEnvColor[4];
+	int alphaColorRef[4];
+	int colorTestMask[4];
+	float blendFixA[4];
+	float blendFixB[4];
+	float texClamp[4];
+	float texClampOffset[4];
+};
+
+static const char *ub_baseStr =
+R"(  mat4 proj_mtx;
+	mat4 proj_through_mtx;
+  mat4 view_mtx;
+  mat4 world_mtx;
+  mat4 tex_mtx;
+  vec4 uvscaleoffset;
+  vec4 depthRange;
+  vec3 fogcoef_stencilreplace;
+  vec4 matambientalpha;
+  vec3 fogcolor;
+  vec3 texenv;
+  ivec4 alphacolorref;
+  ivec4 alphacolormask;
+  vec3 blendFixA;
+  vec3 blendFixB;
+  vec4 texclamp;
+  vec2 texclampoff;
+)";
+
+static const char *cb_baseStr =
+R"(  matrix proj_mtx;
+	matrix proj_through_mtx;
+  matrix view_mtx;
+  matrix world_mtx;
+  matrix tex_mtx;
+  float4 uvscaleoffset;
+  float4 depthRange;
+  float3 fogcoef_stencilreplace;
+  float4 matambientalpha;
+  float3 fogcolor;
+  float3 texenv;
+  ifloat4 alphacolorref;
+  ifloat4 alphacolormask;
+  float3 blendFixA;
+  float3 blendFixB;
+  float4 texclamp;
+  float2 texclampoff;
+)";
+
+// 576 bytes. Can we get down to 512?
+struct UB_VS_Lights {
+	float ambientColor[4];
+	float materialDiffuse[4];
+	float materialSpecular[4];
+	float materialEmissive[4];
+	float lpos[4][4];
+	float ldir[4][4];
+	float latt[4][4];
+	float lightAngle[4][4];   // TODO: Merge with lightSpotCoef, use .xy
+	float lightSpotCoef[4][4];
+	float lightAmbient[4][4];
+	float lightDiffuse[4][4];
+	float lightSpecular[4][4];
+};
+
+static const char *ub_vs_lightsStr =
+R"(	vec4 globalAmbient;
+	vec3 matdiffuse;
+	vec4 matspecular;
+	vec3 matemissive;
+	vec3 pos[4];
+	vec3 dir[4];
+	vec3 att[4];
+	float angle[4];
+	float spotCoef[4];
+	vec3 ambient[4];
+	vec3 diffuse[4];
+	vec3 specular[4];
+)";
+
+static const char *cb_vs_lightsStr =
+R"(	float4 globalAmbient;
+	float3 matdiffuse;
+	float4 matspecular;
+	float3 matemissive;
+	float3 pos[4];
+	float3 dir[4];
+	float3 att[4];
+	float angle[4];
+	float spotCoef[4];
+	float3 ambient[4];
+	float3 diffuse[4];
+	float3 specular[4];
+)";
+
+// With some cleverness, we could get away with uploading just half this when only the four first
+// bones are being used. This is 512b, 256b would be great.
+// Could also move to 4x3 matrices - would let us fit 5 bones into 256b.
+struct UB_VS_Bones {
+	float bones[8][16];
+};
+
+static const char *ub_vs_bonesStr =
+R"(	mat4 m[8];
+)";
+
+static const char *cb_vs_bonesStr =
+R"(	matrix m[8];
+)";
+
+void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms);
+void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
+void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
+
--- a/GPU/D3D11/FragmentShaderGeneratorD3D11.cpp
+++ b/GPU/D3D11/FragmentShaderGeneratorD3D11.cpp
@ -0,0 +1,5 @@
+#include "GPU/D3D11/FragmentShaderGeneratorD3D11.h"
+
+void GenerateFragmentShaderD3D11(const ShaderID &id, char *buffer) {
+
+}
--- a/GPU/D3D11/FragmentShaderGeneratorD3D11.h
+++ b/GPU/D3D11/FragmentShaderGeneratorD3D11.h
@ -0,0 +1,5 @@
+#pragma once
+
+#include "GPU/Common/ShaderId.h"
+
+void GenerateFragmentShaderD3D11(const ShaderID &id, char *buffer);
--- a/GPU/D3D11/ShaderManagerD3D11.cpp
+++ b/GPU/D3D11/ShaderManagerD3D11.cpp
@ -0,0 +1,273 @@
+// Copyright (c) 2015- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#ifdef _WIN32
+#define SHADERLOG
+#endif
+
+#include <d3d11.h>
+#include <d3dcompiler.h>
+
+#include <map>
+
+#include "base/logging.h"
+#include "math/lin/matrix4x4.h"
+#include "math/math_util.h"
+#include "math/dataconv.h"
+#include "util/text/utf8.h"
+#include "thin3d/d3d11_loader.h"
+#include "Common/Common.h"
+#include "Core/Config.h"
+#include "Core/Reporting.h"
+#include "GPU/Math3D.h"
+#include "GPU/GPUState.h"
+#include "GPU/ge_constants.h"
+#include "GPU/D3D11/ShaderManagerD3D11.h"
+#include "GPU/D3D11/FragmentShaderGeneratorD3D11.h"
+#include "GPU/D3D11/VertexShaderGeneratorD3D11.h"
+
+D3D11FragmentShader::D3D11FragmentShader(ID3D11Device *device, ShaderID id, const char *code, bool useHWTransform)
+	: device_(device), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(0) {
+	source_ = code;
+
+	std::string errorMessage;
+
+#ifdef SHADERLOG
+	OutputDebugStringA(code);
+#endif
+
+	uint8_t *bytecode;
+	UINT bytecodeSize;
+
+	HRESULT hr = device_->CreatePixelShader(bytecode, bytecodeSize, nullptr, &module_);
+	if (FAILED(hr)) {
+		failed_ = true;
+		return;
+	}
+}
+
+D3D11FragmentShader::~D3D11FragmentShader() {
+	if (module_)
+		module_->Release();
+}
+
+std::string D3D11FragmentShader::GetShaderString(DebugShaderStringType type) const {
+	switch (type) {
+	case SHADER_STRING_SOURCE_CODE:
+		return source_;
+	case SHADER_STRING_SHORT_DESC:
+		return FragmentShaderDesc(id_);
+	default:
+		return "N/A";
+	}
+}
+
+D3D11VertexShader::D3D11VertexShader(ID3D11Device *device, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting)
+	: device_(device), id_(id), failed_(false), useHWTransform_(useHWTransform), module_(nullptr), usesLighting_(usesLighting) {
+	source_ = code;
+	std::string errorMessage;
+	std::vector<uint32_t> spirv;
+#ifdef SHADERLOG
+	OutputDebugStringA(code);
+#endif
+	uint8_t *bytecode;
+	UINT bytecodeSize;
+
+	HRESULT hr = device_->CreateVertexShader(bytecode, bytecodeSize, nullptr, &module_);
+	if (FAILED(hr)) {
+		failed_ = true;
+		return;
+	}
+}
+
+D3D11VertexShader::~D3D11VertexShader() {
+	if (module_)
+		module_->Release();
+}
+
+std::string D3D11VertexShader::GetShaderString(DebugShaderStringType type) const {
+	switch (type) {
+	case SHADER_STRING_SOURCE_CODE:
+		return source_;
+	case SHADER_STRING_SHORT_DESC:
+		return VertexShaderDesc(id_);
+	default:
+		return "N/A";
+	}
+}
+
+ShaderManagerD3D11::ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context)
+	: device_(device), context_(context), lastVShader_(nullptr), lastFShader_(nullptr) {
+	codeBuffer_ = new char[16384];
+	memset(&ub_base, 0, sizeof(ub_base));
+	memset(&ub_lights, 0, sizeof(ub_lights));
+	memset(&ub_bones, 0, sizeof(ub_bones));
+
+	ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
+	ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
+	ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
+}
+
+ShaderManagerD3D11::~ShaderManagerD3D11() {
+	ClearShaders();
+	delete[] codeBuffer_;
+}
+
+void ShaderManagerD3D11::Clear() {
+	for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) {
+		delete iter->second;
+	}
+	for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) {
+		delete iter->second;
+	}
+	fsCache_.clear();
+	vsCache_.clear();
+	lastFSID_.clear();
+	lastVSID_.clear();
+}
+
+void ShaderManagerD3D11::ClearShaders() {
+	Clear();
+	DirtyShader();
+	gstate_c.Dirty(DIRTY_ALL_UNIFORMS);
+}
+
+void ShaderManagerD3D11::DirtyShader() {
+	// Forget the last shader ID
+	lastFSID_.clear();
+	lastVSID_.clear();
+	lastVShader_ = nullptr;
+	lastFShader_ = nullptr;
+}
+
+void ShaderManagerD3D11::DirtyLastShader() { // disables vertex arrays
+	lastVShader_ = nullptr;
+	lastFShader_ = nullptr;
+}
+
+uint64_t ShaderManagerD3D11::UpdateUniforms() {
+	uint64_t dirty = gstate_c.GetDirtyUniforms();
+	if (dirty != 0) {
+		if (dirty & DIRTY_BASE_UNIFORMS)
+			BaseUpdateUniforms(&ub_base, dirty);
+		if (dirty & DIRTY_LIGHT_UNIFORMS)
+			LightUpdateUniforms(&ub_lights, dirty);
+		if (dirty & DIRTY_BONE_UNIFORMS)
+			BoneUpdateUniforms(&ub_bones, dirty);
+	}
+	gstate_c.CleanUniforms();
+	return dirty;
+}
+
+void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform) {
+	ShaderID VSID;
+	ShaderID FSID;
+	ComputeVertexShaderID(&VSID, vertType, useHWTransform);
+	ComputeFragmentShaderID(&FSID);
+
+	// Just update uniforms if this is the same shader as last time.
+	if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) {
+		*vshader = lastVShader_;
+		*fshader = lastFShader_;
+		// Already all set, no need to look up in shader maps.
+		return;
+	}
+
+	VSCache::iterator vsIter = vsCache_.find(VSID);
+	D3D11VertexShader *vs;
+	if (vsIter == vsCache_.end()) {
+		// Vertex shader not in cache. Let's compile it.
+		bool usesLighting;
+		GenerateVertexShaderD3D11(VSID, codeBuffer_, &usesLighting);
+		vs = new D3D11VertexShader(device_, VSID, codeBuffer_, vertType, useHWTransform, usesLighting);
+		vsCache_[VSID] = vs;
+	} else {
+		vs = vsIter->second;
+	}
+	lastVSID_ = VSID;
+
+	FSCache::iterator fsIter = fsCache_.find(FSID);
+	D3D11FragmentShader *fs;
+	if (fsIter == fsCache_.end()) {
+		// Fragment shader not in cache. Let's compile it.
+		GenerateFragmentShaderD3D11(FSID, codeBuffer_);
+		fs = new D3D11FragmentShader(device_, FSID, codeBuffer_, useHWTransform);
+		fsCache_[FSID] = fs;
+	} else {
+		fs = fsIter->second;
+	}
+
+	lastFSID_ = FSID;
+
+	lastVShader_ = vs;
+	lastFShader_ = fs;
+
+	*vshader = vs;
+	*fshader = fs;
+}
+
+std::vector<std::string> ShaderManagerD3D11::DebugGetShaderIDs(DebugShaderType type) {
+	std::string id;
+	std::vector<std::string> ids;
+	switch (type) {
+	case SHADER_TYPE_VERTEX:
+	{
+		for (auto iter : vsCache_) {
+			iter.first.ToString(&id);
+			ids.push_back(id);
+		}
+		break;
+	}
+	case SHADER_TYPE_FRAGMENT:
+	{
+		for (auto iter : fsCache_) {
+			iter.first.ToString(&id);
+			ids.push_back(id);
+		}
+		break;
+	}
+	default:
+		break;
+	}
+	return ids;
+}
+
+std::string ShaderManagerD3D11::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
+	ShaderID shaderId;
+	shaderId.FromString(id);
+	switch (type) {
+	case SHADER_TYPE_VERTEX:
+	{
+		auto iter = vsCache_.find(shaderId);
+		if (iter == vsCache_.end()) {
+			return "";
+		}
+		return iter->second->GetShaderString(stringType);
+	}
+
+	case SHADER_TYPE_FRAGMENT:
+	{
+		auto iter = fsCache_.find(shaderId);
+		if (iter == fsCache_.end()) {
+			return "";
+		}
+		return iter->second->GetShaderString(stringType);
+	}
+	default:
+		return "N/A";
+	}
+}
--- a/GPU/D3D11/ShaderManagerD3D11.h
+++ b/GPU/D3D11/ShaderManagerD3D11.h
@ -0,0 +1,145 @@
+// Copyright (c) 2017- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#pragma once
+
+#include <map>
+
+#include <d3d11.h>
+
+#include "base/basictypes.h"
+#include "Globals.h"
+#include "GPU/Common/ShaderCommon.h"
+#include "GPU/Common/ShaderId.h"
+// #include "GPU/DX9/VertexShaderGeneratorD3D11.h"
+// #include "GPU/DX9/FragmentShaderGeneratorD3D11.h"
+#include "math/lin/matrix4x4.h"
+#include "GPU/Common/ShaderUniforms.h"
+
+class D3D11Context;
+class D3D11PushBuffer;
+
+class D3D11FragmentShader {
+public:
+	D3D11FragmentShader(ID3D11Device *device, ShaderID id, const char *code, bool useHWTransform);
+	~D3D11FragmentShader();
+
+	const std::string &source() const { return source_; }
+
+	bool Failed() const { return failed_; }
+	bool UseHWTransform() const { return useHWTransform_; }
+
+	std::string GetShaderString(DebugShaderStringType type) const;
+	ID3D11PixelShader *GetShader() const { return module_; }
+
+protected:
+	ID3D11PixelShader *module_;
+
+	ID3D11Device *device_;
+	std::string source_;
+	bool failed_;
+	bool useHWTransform_;
+	ShaderID id_;
+};
+
+class D3D11VertexShader {
+public:
+	D3D11VertexShader(ID3D11Device *device, ShaderID id, const char *code, int vertType, bool useHWTransform, bool usesLighting);
+	~D3D11VertexShader();
+
+	const std::string &source() const { return source_; }
+
+	bool Failed() const { return failed_; }
+	bool UseHWTransform() const { return useHWTransform_; }
+	bool HasBones() const {
+		return id_.Bit(VS_BIT_ENABLE_BONES);
+	}
+	bool HasLights() const {
+		return usesLighting_;
+	}
+
+	std::string GetShaderString(DebugShaderStringType type) const;
+	ID3D11VertexShader *GetModule() const { return module_; }
+
+protected:
+	ID3D11VertexShader *module_;
+
+	ID3D11Device *device_;
+	std::string source_;
+	bool failed_;
+	bool useHWTransform_;
+	bool usesLighting_;
+	ShaderID id_;
+};
+
+class D3D11PushBuffer;
+
+class ShaderManagerD3D11 : public ShaderManagerCommon {
+public:
+	ShaderManagerD3D11(ID3D11Device *device, ID3D11DeviceContext *context);
+	~ShaderManagerD3D11();
+
+	void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform);
+	void ClearShaders();
+	void DirtyShader();
+	void DirtyLastShader();
+
+	int GetNumVertexShaders() const { return (int)vsCache_.size(); }
+	int GetNumFragmentShaders() const { return (int)fsCache_.size(); }
+
+	std::vector<std::string> DebugGetShaderIDs(DebugShaderType type);
+	std::string DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType);
+
+	uint64_t UpdateUniforms();
+
+	// TODO: Avoid copying these buffers if same as last draw, can still point to it assuming we're still in the same pushbuffer.
+	// Applies dirty changes and copies the buffer.
+	bool IsBaseDirty() { return true; }
+	bool IsLightDirty() { return true; }
+	bool IsBoneDirty() { return true; }
+
+	/*
+	uint32_t PushBaseBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
+	uint32_t PushLightBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
+	uint32_t PushBoneBuffer(D3D11PushBuffer *dest, VkBuffer *buf);
+	*/
+
+private:
+	void Clear();
+
+	ID3D11Device *device_;
+	ID3D11DeviceContext *context_;
+
+	typedef std::map<ShaderID, D3D11FragmentShader *> FSCache;
+	FSCache fsCache_;
+
+	typedef std::map<ShaderID, D3D11VertexShader *> VSCache;
+	VSCache vsCache_;
+
+	char *codeBuffer_;
+
+	// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
+	UB_VS_FS_Base ub_base;
+	UB_VS_Lights ub_lights;
+	UB_VS_Bones ub_bones;
+
+	D3D11FragmentShader *lastFShader_;
+	D3D11VertexShader *lastVShader_;
+
+	ShaderID lastFSID_;
+	ShaderID lastVSID_;
+};
--- a/GPU/D3D11/StateMappingD3D11.cpp
+++ b/GPU/D3D11/StateMappingD3D11.cpp
@ -0,0 +1,379 @@
+// Copyright (c) 2012- PPSSPP Project.
+
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, version 2.0 or later versions.
+
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License 2.0 for more details.
+
+// A copy of the GPL 2.0 should have been included with the program.
+// If not, see http://www.gnu.org/licenses/
+
+// Official git repository and contact information can be found at
+// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+
+#include <d3d11.h>
+
+#include "GPU/Math3D.h"
+#include "GPU/GPUState.h"
+#include "GPU/ge_constants.h"
+#include "GPU/Common/GPUStateUtils.h"
+#include "Core/System.h"
+#include "Core/Config.h"
+#include "Core/Reporting.h"
+
+#include "GPU/Common/FramebufferCommon.h"
+
+// These tables all fit into u8s.
+static const D3D11_BLEND d3d11BlendFactorLookup[(size_t)BlendFactor::COUNT] = {
+	D3D11_BLEND_ZERO,
+	D3D11_BLEND_ONE,
+	D3D11_BLEND_SRC_COLOR,
+	D3D11_BLEND_INV_SRC_COLOR,
+	D3D11_BLEND_DEST_COLOR,
+	D3D11_BLEND_INV_DEST_COLOR,
+	D3D11_BLEND_SRC_ALPHA,
+	D3D11_BLEND_INV_SRC_ALPHA,
+	D3D11_BLEND_DEST_ALPHA,
+	D3D11_BLEND_INV_DEST_ALPHA,
+	D3D11_BLEND_BLEND_FACTOR,
+	D3D11_BLEND_INV_BLEND_FACTOR,
+	D3D11_BLEND_BLEND_FACTOR,
+	D3D11_BLEND_INV_BLEND_FACTOR,
+	D3D11_BLEND_SRC1_COLOR,
+	D3D11_BLEND_INV_SRC1_COLOR,
+	D3D11_BLEND_SRC1_ALPHA,
+	D3D11_BLEND_INV_SRC1_ALPHA,
+};
+
+static const D3D11_BLEND_OP d3d11BlendEqLookup[(size_t)BlendEq::COUNT] = {
+	D3D11_BLEND_OP_ADD,
+	D3D11_BLEND_OP_SUBTRACT,
+	D3D11_BLEND_OP_REV_SUBTRACT,
+	D3D11_BLEND_OP_MIN,
+	D3D11_BLEND_OP_MAX,
+};
+
+static const D3D11_CULL_MODE cullingMode[] = {
+	D3D11_CULL_BACK,
+	D3D11_CULL_FRONT,
+};
+
+static const D3D11_COMPARISON_FUNC compareOps[] = {
+	D3D11_COMPARISON_NEVER,
+	D3D11_COMPARISON_ALWAYS,
+	D3D11_COMPARISON_EQUAL,
+	D3D11_COMPARISON_NOT_EQUAL,
+	D3D11_COMPARISON_LESS,
+	D3D11_COMPARISON_LESS_EQUAL,
+	D3D11_COMPARISON_GREATER,
+	D3D11_COMPARISON_GREATER_EQUAL,
+};
+
+static const D3D11_STENCIL_OP stencilOps[] = {
+	D3D11_STENCIL_OP_KEEP,
+	D3D11_STENCIL_OP_ZERO,
+	D3D11_STENCIL_OP_REPLACE,
+	D3D11_STENCIL_OP_INVERT,
+	D3D11_STENCIL_OP_INCR_SAT,
+	D3D11_STENCIL_OP_DECR_SAT,
+	D3D11_STENCIL_OP_KEEP, // reserved
+	D3D11_STENCIL_OP_KEEP, // reserved
+};
+
+static const D3D11_PRIMITIVE_TOPOLOGY primToD3D11[8] = {
+	D3D11_PRIMITIVE_TOPOLOGY_POINTLIST,
+	D3D11_PRIMITIVE_TOPOLOGY_LINELIST,
+	D3D11_PRIMITIVE_TOPOLOGY_LINESTRIP,
+	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
+	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP,
+	D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED,  // D3D11 doesn't do triangle fans.
+	D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
+};
+
+// These are actually the same exact values/order/etc. as the GE ones, but for clarity...
+/*
+static const D3D11_LOGIC_OP logicOps[] = {
+	D3D11_LOGIC_OP_CLEAR,
+	D3D11_LOGIC_OP_AND,
+	D3D11_LOGIC_OP_AND_REVERSE,
+	D3D11_LOGIC_OP_COPY,
+	D3D11_LOGIC_OP_AND_INVERTED,
+	D3D11_LOGIC_OP_NO_OP,
+	D3D11_LOGIC_OP_XOR,
+	D3D11_LOGIC_OP_OR,
+	D3D11_LOGIC_OP_NOR,
+	D3D11_LOGIC_OP_EQUIVALENT,
+	D3D11_LOGIC_OP_INVERT,
+	D3D11_LOGIC_OP_OR_REVERSE,
+	D3D11_LOGIC_OP_COPY_INVERTED,
+	D3D11_LOGIC_OP_OR_INVERTED,
+	D3D11_LOGIC_OP_NAND,
+	D3D11_LOGIC_OP_SET,
+};
+*/
+
+static bool ApplyShaderBlending() {
+	return false;
+}
+
+static void ResetShaderBlending() {
+	//
+}
+
+class FramebufferManagerD3D11;
+class ShaderManagerD3D11;
+
+// TODO: Do this more progressively. No need to compute the entire state if the entire state hasn't changed.
+// In Vulkan, we simply collect all the state together into a "pipeline key" - we don't actually set any state here
+// (the caller is responsible for setting the little dynamic state that is supported, dynState).
+
+struct D3D11BlendKey {
+	// Blend
+	unsigned int blendEnable : 1;
+	unsigned int srcColor : 5;  // D3D11_BLEND
+	unsigned int destColor : 5;  // D3D11_BLEND
+	unsigned int srcAlpha : 5;  // D3D11_BLEND
+	unsigned int destAlpha : 5;  // D3D11_BLEND
+	unsigned int blendOpColor : 3;  // D3D11_BLEND_OP
+	unsigned int blendOpAlpha : 3;  // D3D11_BLEND_OP
+	unsigned int logicOpEnable : 1;
+	unsigned int logicOp : 4;  // D3D11_LOGIC_OP
+	unsigned int colorWriteMask : 4;
+};
+
+struct D3D11DepthStencilKey {
+	// Depth/Stencil
+	unsigned int depthTestEnable : 1;
+	unsigned int depthWriteEnable : 1;
+	unsigned int depthCompareOp : 3;  // D3D11_COMPARISON 
+	unsigned int stencilTestEnable : 1;
+	unsigned int stencilCompareOp : 3;  // D3D11_COMPARISON
+	unsigned int stencilPassOp : 4; // D3D11_STENCIL_OP
+	unsigned int stencilFailOp : 4; // D3D11_STENCIL_OP
+	unsigned int stencilDepthFailOp : 4;  // D3D11_STENCIL_OP
+};
+
+struct D3D11RasterKey {
+	unsigned int cullMode : 2;  // D3D11_CULL_MODE 
+};
+
+// In D3D11 we cache blend state objects etc, and we simply emit keys, which are then also used to create these objects.
+struct D3D11StateKeys {
+	D3D11BlendKey blend;
+	D3D11DepthStencilKey depthStencil;
+	D3D11RasterKey raster;
+};
+
+struct D3D11DynamicState {
+	int topology;
+	bool useBlendColor;
+	uint32_t blendColor;
+	bool useStencil;
+	uint8_t stencilRef;
+	uint8_t stencilWriteMask;
+	uint8_t stencilCompareMask;
+	D3D11_VIEWPORT viewport;
+	D3D11_RECT scissor;
+};
+
+void ConvertStateToKeys(FramebufferManagerCommon *fbManager, ShaderManagerD3D11 *shaderManager, int prim, D3D11StateKeys &key, D3D11DynamicState &dynState) {
+	memset(&key, 0, sizeof(key));
+	memset(&dynState, 0, sizeof(dynState));
+	// Unfortunately, this isn't implemented yet.
+	gstate_c.allowShaderBlend = false;
+
+	// Set blend - unless we need to do it in the shader.
+	GenericBlendState blendState;
+	ConvertBlendState(blendState, gstate_c.allowShaderBlend);
+
+	bool useBufferedRendering = g_Config.iRenderingMode != FB_NON_BUFFERED_MODE;
+
+	ViewportAndScissor vpAndScissor;
+	ConvertViewportAndScissor(useBufferedRendering,
+		fbManager->GetRenderWidth(), fbManager->GetRenderHeight(),
+		fbManager->GetTargetBufferWidth(), fbManager->GetTargetBufferHeight(),
+		vpAndScissor);
+
+	if (blendState.applyShaderBlending) {
+		if (ApplyShaderBlending()) {
+			// We may still want to do something about stencil -> alpha.
+			ApplyStencilReplaceAndLogicOp(blendState.replaceAlphaWithStencil, blendState);
+		} else {
+			// Until next time, force it off.
+			ResetShaderBlending();
+			gstate_c.allowShaderBlend = false;
+		}
+	} else if (blendState.resetShaderBlending) {
+		ResetShaderBlending();
+	}
+
+	if (blendState.enabled) {
+		key.blend.blendEnable = true;
+		key.blend.blendOpColor = d3d11BlendEqLookup[(size_t)blendState.eqColor];
+		key.blend.blendOpAlpha = d3d11BlendEqLookup[(size_t)blendState.eqAlpha];
+		key.blend.srcColor = d3d11BlendFactorLookup[(size_t)blendState.srcColor];
+		key.blend.srcAlpha = d3d11BlendFactorLookup[(size_t)blendState.srcAlpha];
+		key.blend.destColor = d3d11BlendFactorLookup[(size_t)blendState.dstColor];
+		key.blend.destAlpha = d3d11BlendFactorLookup[(size_t)blendState.dstAlpha];
+		if (blendState.dirtyShaderBlend) {
+			gstate_c.Dirty(DIRTY_SHADERBLEND);
+		}
+		dynState.useBlendColor = blendState.useBlendColor;
+		if (blendState.useBlendColor) {
+			dynState.blendColor = blendState.blendColor;
+		}
+	} else {
+		key.blend.blendEnable = false;
+		dynState.useBlendColor = false;
+	}
+
+	dynState.useStencil = false;
+
+	// Set ColorMask/Stencil/Depth
+	if (gstate.isModeClear()) {
+		key.blend.logicOpEnable = false;
+		key.raster.cullMode = D3D11_CULL_NONE;
+
+		key.depthStencil.depthTestEnable = true;
+		key.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
+		key.depthStencil.depthWriteEnable = gstate.isClearModeDepthMask();
+		if (gstate.isClearModeDepthMask()) {
+			fbManager->SetDepthUpdated();
+		}
+
+		// Color Test
+		bool colorMask = gstate.isClearModeColorMask();
+		bool alphaMask = gstate.isClearModeAlphaMask();
+		key.blend.colorWriteMask = (colorMask ? (1 | 2 | 4) : 0) | (alphaMask ? 8 : 0);
+
+		// Stencil Test
+		if (alphaMask) {
+			key.depthStencil.stencilTestEnable = true;
+			key.depthStencil.stencilCompareOp = D3D11_COMPARISON_ALWAYS;
+			key.depthStencil.stencilPassOp = D3D11_STENCIL_OP_REPLACE;
+			key.depthStencil.stencilFailOp = D3D11_STENCIL_OP_REPLACE;
+			key.depthStencil.stencilDepthFailOp = D3D11_STENCIL_OP_REPLACE;
+			dynState.useStencil = true;
+			// In clear mode, the stencil value is set to the alpha value of the vertex.
+			// A normal clear will be 2 points, the second point has the color.
+			// We override this value in the pipeline from software transform for clear rectangles.
+			dynState.stencilRef = 0xFF;
+			dynState.stencilWriteMask = 0xFF;
+		} else {
+			key.depthStencil.stencilTestEnable = false;
+			dynState.useStencil = false;
+		}
+	} else {
+		if (gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
+			// Logic Ops
+			if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY) {
+				key.blend.logicOpEnable = true;
+				// key.blendKey.logicOp = logicOps[gstate.getLogicOp()];
+			} else {
+				key.blend.logicOpEnable = false;
+			}
+		}
+
+		// Set cull
+		bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
+		key.raster.cullMode = wantCull ? (gstate.getCullMode() ? D3D11_CULL_FRONT : D3D11_CULL_BACK) : D3D11_CULL_NONE;
+
+		// Depth Test
+		if (gstate.isDepthTestEnabled()) {
+			key.depthStencil.depthTestEnable = true;
+			key.depthStencil.depthCompareOp = compareOps[gstate.getDepthTestFunction()];
+			key.depthStencil.depthWriteEnable = gstate.isDepthWriteEnabled();
+			if (gstate.isDepthWriteEnabled()) {
+				fbManager->SetDepthUpdated();
+			}
+		} else {
+			key.depthStencil.depthTestEnable = false;
+			key.depthStencil.depthWriteEnable = false;
+			key.depthStencil.depthCompareOp = D3D11_COMPARISON_ALWAYS;
+		}
+
+		// PSP color/alpha mask is per bit but we can only support per byte.
+		// But let's do that, at least. And let's try a threshold.
+		bool rmask = (gstate.pmskc & 0xFF) < 128;
+		bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
+		bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
+		bool amask = (gstate.pmska & 0xFF) < 128;
+
+#ifndef MOBILE_DEVICE
+		u8 abits = (gstate.pmska >> 0) & 0xFF;
+		u8 rbits = (gstate.pmskc >> 0) & 0xFF;
+		u8 gbits = (gstate.pmskc >> 8) & 0xFF;
+		u8 bbits = (gstate.pmskc >> 16) & 0xFF;
+		if ((rbits != 0 && rbits != 0xFF) || (gbits != 0 && gbits != 0xFF) || (bbits != 0 && bbits != 0xFF)) {
+			WARN_LOG_REPORT_ONCE(rgbmask, G3D, "Unsupported RGB mask: r=%02x g=%02x b=%02x", rbits, gbits, bbits);
+		}
+		if (abits != 0 && abits != 0xFF) {
+			// The stencil part of the mask is supported.
+			WARN_LOG_REPORT_ONCE(amask, G3D, "Unsupported alpha/stencil mask: %02x", abits);
+		}
+#endif
+
+		// Let's not write to alpha if stencil isn't enabled.
+		if (!gstate.isStencilTestEnabled()) {
+			amask = false;
+		} else {
+			// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
+			if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
+				amask = false;
+			}
+		}
+
+		key.blend.colorWriteMask = (rmask ? 1 : 0) | (gmask ? 2 : 0) | (bmask ? 4 : 0) | (amask ? 8 : 0);
+
+		GenericStencilFuncState stencilState;
+		ConvertStencilFuncState(stencilState);
+
+		// Stencil Test
+		if (stencilState.enabled) {
+			key.depthStencil.stencilTestEnable = true;
+			key.depthStencil.stencilCompareOp = compareOps[stencilState.testFunc];
+			key.depthStencil.stencilPassOp = stencilOps[stencilState.zPass];
+			key.depthStencil.stencilFailOp = stencilOps[stencilState.sFail];
+			key.depthStencil.stencilDepthFailOp = stencilOps[stencilState.zFail];
+			dynState.useStencil = true;
+			dynState.stencilRef = stencilState.testRef;
+			dynState.stencilCompareMask = stencilState.testMask;
+			dynState.stencilWriteMask = stencilState.writeMask;
+		} else {
+			key.depthStencil.stencilTestEnable = false;
+			dynState.useStencil = false;
+		}
+	}
+
+	dynState.topology = primToD3D11[prim];
+
+	D3D11_VIEWPORT &vp = dynState.viewport;
+	vp.TopLeftX = vpAndScissor.viewportX;
+	vp.TopLeftY = vpAndScissor.viewportY;
+	vp.Width = vpAndScissor.viewportW;
+	vp.Height = vpAndScissor.viewportH;
+	vp.MinDepth = vpAndScissor.depthRangeMin;
+	vp.MaxDepth = vpAndScissor.depthRangeMax;
+	if (vpAndScissor.dirtyProj) {
+		gstate_c.Dirty(DIRTY_PROJMATRIX);
+	}
+
+	D3D11_RECT &scissor = dynState.scissor;
+	scissor.left = vpAndScissor.scissorX;
+	scissor.top = vpAndScissor.scissorY;
+	scissor.right = vpAndScissor.scissorX + vpAndScissor.scissorW;
+	scissor.bottom = vpAndScissor.scissorY + vpAndScissor.scissorH;
+
+	float depthMin = vpAndScissor.depthRangeMin;
+	float depthMax = vpAndScissor.depthRangeMax;
+
+	if (depthMin < 0.0f) depthMin = 0.0f;
+	if (depthMax > 1.0f) depthMax = 1.0f;
+	if (vpAndScissor.dirtyDepth) {
+		gstate_c.Dirty(DIRTY_DEPTHRANGE);
+	}
+}
--- a/GPU/D3D11/VertexShaderGeneratorD3D11.cpp
+++ b/GPU/D3D11/VertexShaderGeneratorD3D11.cpp
@ -0,0 +1,5 @@
+#include "GPU/D3D11/VertexShaderGeneratorD3D11.h"
+
+void GenerateVertexShaderD3D11(const ShaderID &id, char *buffer, bool *usesLighting) {
+
+}
--- a/GPU/D3D11/VertexShaderGeneratorD3D11.h
+++ b/GPU/D3D11/VertexShaderGeneratorD3D11.h
@ -0,0 +1,5 @@
+#pragma once
+
+#include "GPU/Common/ShaderId.h"
+
+void GenerateVertexShaderD3D11(const ShaderID &id, char *buffer, bool *usesLighting);
--- a/GPU/Directx9/PixelShaderGeneratorDX9.h
+++ b/GPU/Directx9/PixelShaderGeneratorDX9.h
@ -17,8 +17,6 @@

 #pragma once

-#include "Globals.h"
-
 #include "GPU/Common/ShaderId.h"

 namespace DX9 {
--- a/GPU/GPU.vcxproj
+++ b/GPU/GPU.vcxproj
@ -83,7 +83,7 @@
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
-      <AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
      <PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_SECURE_NO_WARNINGS;WIN32;_ARCH_32=1;_M_IX86=1;_DEBUG;_LIB;_UNICODE;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
      <FloatingPointModel>Fast</FloatingPointModel>
@ -105,7 +105,7 @@
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
-      <AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
      <FloatingPointModel>Fast</FloatingPointModel>
      <OmitFramePointers>false</OmitFramePointers>
@ -131,7 +131,7 @@
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
      <BufferSecurityCheck>false</BufferSecurityCheck>
      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
      <FloatingPointModel>Fast</FloatingPointModel>
@ -157,7 +157,7 @@
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
-      <AdditionalIncludeDirectories>../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\dx9sdk\Include\DX11;../common;..;../ext;../ext/native;../ext/glew;../ext/snappy;</AdditionalIncludeDirectories>
      <BufferSecurityCheck>false</BufferSecurityCheck>
      <EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
      <FloatingPointModel>Fast</FloatingPointModel>
@ -190,6 +190,7 @@
    <ClInclude Include="Common\PostShader.h" />
    <ClInclude Include="Common\ShaderCommon.h" />
    <ClInclude Include="Common\ShaderId.h" />
+    <ClInclude Include="Common\ShaderUniforms.h" />
    <ClInclude Include="Common\SoftwareTransformCommon.h" />
    <ClInclude Include="Common\SplineCommon.h" />
    <ClInclude Include="Common\TextureDecoderNEON.h">
@ -202,6 +203,9 @@
    <ClInclude Include="Common\TextureScalerCommon.h" />
    <ClInclude Include="Common\TransformCommon.h" />
    <ClInclude Include="Common\VertexDecoderCommon.h" />
+    <ClInclude Include="D3D11\FragmentShaderGeneratorD3D11.h" />
+    <ClInclude Include="D3D11\ShaderManagerD3D11.h" />
+    <ClInclude Include="D3D11\VertexShaderGeneratorD3D11.h" />
    <ClInclude Include="Debugger\Breakpoints.h" />
    <ClInclude Include="Debugger\Stepping.h" />
    <ClInclude Include="Directx9\DepalettizeShaderDX9.h" />
@ -261,6 +265,7 @@
    <ClCompile Include="Common\IndexGenerator.cpp" />
    <ClCompile Include="Common\PostShader.cpp" />
    <ClCompile Include="Common\ShaderId.cpp" />
+    <ClCompile Include="Common\ShaderUniforms.cpp" />
    <ClCompile Include="Common\SplineCommon.cpp" />
    <ClCompile Include="Common\TextureDecoderNEON.cpp">
      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
@ -286,6 +291,10 @@
    </ClCompile>
    <ClCompile Include="Common\VertexDecoderCommon.cpp" />
    <ClCompile Include="Common\VertexDecoderX86.cpp" />
+    <ClCompile Include="D3D11\FragmentShaderGeneratorD3D11.cpp" />
+    <ClCompile Include="D3D11\ShaderManagerD3D11.cpp" />
+    <ClCompile Include="D3D11\StateMappingD3D11.cpp" />
+    <ClCompile Include="D3D11\VertexShaderGeneratorD3D11.cpp" />
    <ClCompile Include="Debugger\Breakpoints.cpp" />
    <ClCompile Include="Debugger\Stepping.cpp" />
    <ClCompile Include="Directx9\DepalettizeShaderDX9.cpp" />
@ -344,4 +353,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/GPU/GPU.vcxproj.filters
+++ b/GPU/GPU.vcxproj.filters
@ -25,6 +25,9 @@
    <Filter Include="Vulkan">
      <UniqueIdentifier>{3c621896-140c-4c8b-8e4d-a478bfdeca8a}</UniqueIdentifier>
    </Filter>
+    <Filter Include="D3D11">
+      <UniqueIdentifier>{88eb5cea-ec25-4881-89da-02f9f2fa8f3f}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="ge_constants.h">
@ -222,6 +225,18 @@
    <ClInclude Include="GLES\FragmentTestCacheGLES.h">
      <Filter>GLES</Filter>
    </ClInclude>
+    <ClInclude Include="Common\ShaderUniforms.h">
+      <Filter>Common</Filter>
+    </ClInclude>
+    <ClInclude Include="D3D11\ShaderManagerD3D11.h">
+      <Filter>D3D11</Filter>
+    </ClInclude>
+    <ClInclude Include="D3D11\VertexShaderGeneratorD3D11.h">
+      <Filter>D3D11</Filter>
+    </ClInclude>
+    <ClInclude Include="D3D11\FragmentShaderGeneratorD3D11.h">
+      <Filter>D3D11</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="Math3D.cpp">
@ -428,5 +443,20 @@
    <ClCompile Include="GLES\FragmentTestCacheGLES.cpp">
      <Filter>GLES</Filter>
    </ClCompile>
+    <ClCompile Include="D3D11\StateMappingD3D11.cpp">
+      <Filter>D3D11</Filter>
+    </ClCompile>
+    <ClCompile Include="Common\ShaderUniforms.cpp">
+      <Filter>Common</Filter>
+    </ClCompile>
+    <ClCompile Include="D3D11\ShaderManagerD3D11.cpp">
+      <Filter>D3D11</Filter>
+    </ClCompile>
+    <ClCompile Include="D3D11\VertexShaderGeneratorD3D11.cpp">
+      <Filter>D3D11</Filter>
+    </ClCompile>
+    <ClCompile Include="D3D11\FragmentShaderGeneratorD3D11.cpp">
+      <Filter>D3D11</Filter>
+    </ClCompile>
  </ItemGroup>
-</Project>
+</Project>
--- a/GPU/Vulkan/ShaderManagerVulkan.cpp
+++ b/GPU/Vulkan/ShaderManagerVulkan.cpp
@ -150,12 +150,6 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons
 	}
 }

-static void ConvertProjMatrixToVulkan(Matrix4x4 &in, bool invertedX, bool invertedY) {
-	const Vec3 trans(0, 0, gstate_c.vpZOffset * 0.5f + 0.5f);
-	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
-	in.translateAndScale(trans, scale);
-}
-
 ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
 	: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) {
 	codeBuffer_ = new char[16384];
@ -187,207 +181,6 @@ uint32_t ShaderManagerVulkan::PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *b
 	return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
 }

-void ShaderManagerVulkan::BaseUpdateUniforms(uint64_t dirtyUniforms) {
-	if (dirtyUniforms & DIRTY_TEXENV) {
-		Uint8x3ToFloat4(ub_base.texEnvColor, gstate.texenvcolor);
-	}
-	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
-		Uint8x3ToInt4_Alpha(ub_base.alphaColorRef, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
-	}
-	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
-		Uint8x3ToInt4_Alpha(ub_base.colorTestMask, gstate.getColorTestMask(), gstate.getAlphaTestMask());
-	}
-	if (dirtyUniforms & DIRTY_FOGCOLOR) {
-		Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
-	}
-	if (dirtyUniforms & DIRTY_SHADERBLEND) {
-		Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
-		Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
-	}
-	if (dirtyUniforms & DIRTY_TEXCLAMP) {
-		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
-		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
-		const int w = gstate.getTextureWidth(0);
-		const int h = gstate.getTextureHeight(0);
-		const float widthFactor = (float)w * invW;
-		const float heightFactor = (float)h * invH;
-
-		// First wrap xy, then half texel xy (for clamp.)
-		ub_base.texClamp[0] = widthFactor;
-		ub_base.texClamp[1] = heightFactor;
-		ub_base.texClamp[2] = invW * 0.5f;
-		ub_base.texClamp[3] = invH * 0.5f;
-		ub_base.texClampOffset[0] = gstate_c.curTextureXOffset * invW;
-		ub_base.texClampOffset[1] = gstate_c.curTextureYOffset * invH;
-	}
-
-	if (dirtyUniforms & DIRTY_PROJMATRIX) {
-		Matrix4x4 flippedMatrix;
-		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
-
-		const bool invertedY = gstate_c.vpHeight < 0;
-		if (invertedY) {
-			flippedMatrix[1] = -flippedMatrix[1];
-			flippedMatrix[5] = -flippedMatrix[5];
-			flippedMatrix[9] = -flippedMatrix[9];
-			flippedMatrix[13] = -flippedMatrix[13];
-		}
-		const bool invertedX = gstate_c.vpWidth < 0;
-		if (invertedX) {
-			flippedMatrix[0] = -flippedMatrix[0];
-			flippedMatrix[4] = -flippedMatrix[4];
-			flippedMatrix[8] = -flippedMatrix[8];
-			flippedMatrix[12] = -flippedMatrix[12];
-		}
-		ConvertProjMatrixToVulkan(flippedMatrix, invertedX, invertedY);
-		CopyMatrix4x4(ub_base.proj, flippedMatrix.getReadPtr());
-	}
-
-	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
-		Matrix4x4 proj_through;
-		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
-		CopyMatrix4x4(ub_base.proj_through, proj_through.getReadPtr());
-	}
-
-	// Transform
-	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
-		ConvertMatrix4x3To4x4(ub_base.world, gstate.worldMatrix);
-	}
-	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
-		ConvertMatrix4x3To4x4(ub_base.view, gstate.viewMatrix);
-	}
-	if (dirtyUniforms & DIRTY_TEXMATRIX) {
-		ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
-	}
-
-	// Combined two small uniforms
-	if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
-		float fogcoef_stencil[3] = {
-			getFloat24(gstate.fog1),
-			getFloat24(gstate.fog2),
-			(float)gstate.getStencilTestRef()
-		};
-		if (my_isinf(fogcoef_stencil[1])) {
-			// not really sure what a sensible value might be.
-			fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
-		} else if (my_isnan(fogcoef_stencil[1])) {
-			// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
-			// Just put the fog far away at a large finite distance.
-			// Infinities and NaNs are rather unpredictable in shaders on many GPUs
-			// so it's best to just make it a sane calculation.
-			fogcoef_stencil[0] = 100000.0f;
-			fogcoef_stencil[1] = 1.0f;
-		}
-#ifndef MOBILE_DEVICE
-		else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
-			ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
-		}
-#endif
-		CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
-	}
-
-	// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
-	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
-		Uint8x3ToFloat4_AlphaUint8(ub_base.matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
-	}
-
-	// Texturing
-	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
-		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
-		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
-		const int w = gstate.getTextureWidth(0);
-		const int h = gstate.getTextureHeight(0);
-		const float widthFactor = (float)w * invW;
-		const float heightFactor = (float)h * invH;
-		ub_base.uvScaleOffset[0] = widthFactor;
-		ub_base.uvScaleOffset[1] = heightFactor;
-		ub_base.uvScaleOffset[2] = 0.0f;
-		ub_base.uvScaleOffset[3] = 0.0f;
-	}
-
-	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
-		float viewZScale = gstate.getViewportZScale();
-		float viewZCenter = gstate.getViewportZCenter();
-		float viewZInvScale;
-
-		// We had to scale and translate Z to account for our clamped Z range.
-		// Therefore, we also need to reverse this to round properly.
-		//
-		// Example: scale = 65535.0, center = 0.0
-		// Resulting range = -65535 to 65535, clamped to [0, 65535]
-		// gstate_c.vpDepthScale = 2.0f
-		// gstate_c.vpZOffset = -1.0f
-		//
-		// The projection already accounts for those, so we need to reverse them.
-		//
-		// Additionally, D3D9 uses a range from [0, 1].  We double and move the center.
-		viewZScale *= (1.0f / gstate_c.vpDepthScale) * 2.0f;
-		viewZCenter -= 65535.0f * gstate_c.vpZOffset + 32768.5f;
-
-		if (viewZScale != 0.0) {
-			viewZInvScale = 1.0f / viewZScale;
-		} else {
-			viewZInvScale = 0.0;
-		}
-
-		ub_base.depthRange[0] = viewZScale;
-		ub_base.depthRange[1] = viewZCenter;
-		ub_base.depthRange[2] = viewZCenter;
-		ub_base.depthRange[3] = viewZInvScale;
-	}
-}
-
-void ShaderManagerVulkan::LightUpdateUniforms(uint64_t dirtyUniforms) {
-	// Lighting
-	if (dirtyUniforms & DIRTY_AMBIENT) {
-		Uint8x3ToFloat4_AlphaUint8(ub_lights.ambientColor, gstate.ambientcolor, gstate.getAmbientA());
-	}
-	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
-		Uint8x3ToFloat4(ub_lights.materialDiffuse, gstate.materialdiffuse);
-	}
-	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
-		Uint8x3ToFloat4(ub_lights.materialEmissive, gstate.materialemissive);
-	}
-	if (dirtyUniforms & DIRTY_MATSPECULAR) {
-		Uint8x3ToFloat4_Alpha(ub_lights.materialSpecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
-	}
-
-	for (int i = 0; i < 4; i++) {
-		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
-			if (gstate.isDirectionalLight(i)) {
-				// Prenormalize
-				float x = getFloat24(gstate.lpos[i * 3 + 0]);
-				float y = getFloat24(gstate.lpos[i * 3 + 1]);
-				float z = getFloat24(gstate.lpos[i * 3 + 2]);
-				float len = sqrtf(x*x + y*y + z*z);
-				if (len == 0.0f)
-					len = 1.0f;
-				else
-					len = 1.0f / len;
-				float vec[3] = { x * len, y * len, z * len };
-				CopyFloat3To4(ub_lights.lpos[i], vec);
-			} else {
-				ExpandFloat24x3ToFloat4(ub_lights.lpos[i], &gstate.lpos[i * 3]);
-			}
-			ExpandFloat24x3ToFloat4(ub_lights.ldir[i], &gstate.ldir[i * 3]);
-			ExpandFloat24x3ToFloat4(ub_lights.latt[i], &gstate.latt[i * 3]);
-			CopyFloat1To4(ub_lights.lightAngle[i], getFloat24(gstate.lcutoff[i]));
-			CopyFloat1To4(ub_lights.lightSpotCoef[i], getFloat24(gstate.lconv[i]));
-			Uint8x3ToFloat4(ub_lights.lightAmbient[i], gstate.lcolor[i * 3]);
-			Uint8x3ToFloat4(ub_lights.lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
-			Uint8x3ToFloat4(ub_lights.lightSpecular[i], gstate.lcolor[i * 3 + 2]);
-		}
-	}
-}
-
-void ShaderManagerVulkan::BoneUpdateUniforms(uint64_t dirtyUniforms) {
-	for (int i = 0; i < 8; i++) {
-		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
-			ConvertMatrix4x3To4x4(ub_bones.bones[i], gstate.boneMatrix + 12 * i);
-		}
-	}
-}
-
 void ShaderManagerVulkan::DeviceRestore(VulkanContext *vulkan) {
 	vulkan_ = vulkan;
 	uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
@ -429,11 +222,11 @@ uint64_t ShaderManagerVulkan::UpdateUniforms() {
 	uint64_t dirty = gstate_c.GetDirtyUniforms();
 	if (dirty != 0) {
 		if (dirty & DIRTY_BASE_UNIFORMS)
-			BaseUpdateUniforms(dirty);
+			BaseUpdateUniforms(&ub_base, dirty);
 		if (dirty & DIRTY_LIGHT_UNIFORMS)
-			LightUpdateUniforms(dirty);
+			LightUpdateUniforms(&ub_lights, dirty);
 		if (dirty & DIRTY_BONE_UNIFORMS)
-			BoneUpdateUniforms(dirty);
+			BoneUpdateUniforms(&ub_bones, dirty);
 	}
 	gstate_c.CleanUniforms();
 	return dirty;
--- a/GPU/Vulkan/ShaderManagerVulkan.h
+++ b/GPU/Vulkan/ShaderManagerVulkan.h
@ -27,104 +27,7 @@
 #include "GPU/Vulkan/FragmentShaderGeneratorVulkan.h"
 #include "GPU/Vulkan/VulkanUtil.h"
 #include "math/lin/matrix4x4.h"
-
-void ConvertProjMatrixToVulkan(Matrix4x4 & in);
-
-// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
-enum : uint64_t {
-	DIRTY_BASE_UNIFORMS = 
-		DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
-		DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE | 
-		DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA,
-	DIRTY_LIGHT_UNIFORMS =
-		DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
-		DIRTY_MATDIFFUSE | DIRTY_MATSPECULAR | DIRTY_MATEMISSIVE | DIRTY_AMBIENT,
-};
-
-// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
-// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
-struct UB_VS_FS_Base {
-	float proj[16];
-	float proj_through[16];
-	float view[16];
-	float world[16];
-	float tex[16];  // not that common, may want to break out
-	float uvScaleOffset[4];
-	float depthRange[4];
-	float fogCoef_stencil[4];
-	float matAmbient[4];
-	// Fragment data
-	float fogColor[4];
-	float texEnvColor[4];
-	int alphaColorRef[4];
-	int colorTestMask[4];
-	float blendFixA[4];
-	float blendFixB[4];
-	float texClamp[4];
-	float texClampOffset[4];
-};
-
-static const char *ub_baseStr =
-R"(  mat4 proj_mtx;
-	mat4 proj_through_mtx;
-  mat4 view_mtx;
-  mat4 world_mtx;
-  mat4 tex_mtx;
-  vec4 uvscaleoffset;
-  vec4 depthRange;
-  vec3 fogcoef_stencilreplace;
-  vec4 matambientalpha;
-  vec3 fogcolor;
-  vec3 texenv;
-  ivec4 alphacolorref;
-  ivec4 alphacolormask;
-  vec3 blendFixA;
-  vec3 blendFixB;
-  vec4 texclamp;
-  vec2 texclampoff;
-)";
-
-// 576 bytes. Can we get down to 512?
-struct UB_VS_Lights {
-	float ambientColor[4];
-	float materialDiffuse[4];
-	float materialSpecular[4];
-	float materialEmissive[4];
-	float lpos[4][4];
-	float ldir[4][4];
-	float latt[4][4];
-	float lightAngle[4][4];   // TODO: Merge with lightSpotCoef, use .xy
-	float lightSpotCoef[4][4];
-	float lightAmbient[4][4];
-	float lightDiffuse[4][4];
-	float lightSpecular[4][4];
-};
-
-static const char *ub_vs_lightsStr =
-R"(	vec4 globalAmbient;
-	vec3 matdiffuse;
-	vec4 matspecular;
-	vec3 matemissive;
-	vec3 pos[4];
-	vec3 dir[4];
-	vec3 att[4];
-	float angle[4];
-	float spotCoef[4];
-	vec3 ambient[4];
-	vec3 diffuse[4];
-	vec3 specular[4];
-)";
-
-// With some cleverness, we could get away with uploading just half this when only the four first
-// bones are being used. This is 512b, 256b would be great.
-// Could also move to 4x3 matrices - would let us fit 5 bones into 256b.
-struct UB_VS_Bones {
-	float bones[8][16];
-};
-
-static const char *ub_vs_bonesStr =
-R"(	mat4 m[8];
-)";
+#include "GPU/Common/ShaderUniforms.h"

 class VulkanContext;
 class VulkanPushBuffer;
@ -215,10 +118,6 @@ public:
 	uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf);

 private:
-	void BaseUpdateUniforms(uint64_t dirtyUniforms);
-	void LightUpdateUniforms(uint64_t dirtyUniforms);
-	void BoneUpdateUniforms(uint64_t dirtyUniforms);
-
 	void Clear();

 	VulkanContext *vulkan_;