Merge pull request #11425 from xebra/refactor_spline_bezier

[Refactoring] Improve spline/bezier.
This commit is contained in:
Henrik Rydgård 2018-11-04 17:04:29 +01:00 committed by GitHub
commit 22c066515e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
32 changed files with 1485 additions and 1786 deletions

View File

@ -34,7 +34,6 @@ enum {
};
DrawEngineCommon::DrawEngineCommon() : decoderMap_(16) {
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
decJitCache_ = new VertexDecoderJitCache();
transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
@ -43,11 +42,11 @@ DrawEngineCommon::DrawEngineCommon() : decoderMap_(16) {
DrawEngineCommon::~DrawEngineCommon() {
FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
delete[] quadIndices_;
delete decJitCache_;
decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) {
delete decoder;
});
ClearSplineBezierWeights();
}
VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
@ -739,3 +738,25 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
}
}
}
void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType) {
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
for (int i = 0; i < size; ++i) {
memcpy(pos, points[i]->pos.AsArray(), 3 * sizeof(float));
pos += posStride;
}
if (hasTexCoord) {
for (int i = 0; i < size; ++i) {
memcpy(tex, points[i]->uv, 2 * sizeof(float));
tex += texStride;
}
}
if (hasColor) {
for (int i = 0; i < size; ++i) {
memcpy(col, Vec4f::FromRGBA(points[i]->color_32).AsArray(), 4 * sizeof(float));
col += colStride;
}
}
}

View File

@ -34,7 +34,6 @@ enum {
VERTEX_BUFFER_MAX = 65536,
DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 64,
DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 16,
SPLINE_BUFFER_SIZE = VERTEX_BUFFER_MAX * 26, // At least, this buffer needs greater than 1679616 bytes for Mist Dragon morphing in FF4CC.
};
// Avoiding the full include of TextureDecoder.h.
@ -50,6 +49,15 @@ inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
return (vertType & 0xFFFFFF) | (uvGenMode << 24);
}
struct SimpleVertex;
namespace Spline { struct Weight2D; }
class TessellationDataTransfer {
public:
void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);
virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
};
class DrawEngineCommon {
public:
DrawEngineCommon();
@ -75,6 +83,7 @@ public:
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
void ClearSplineBezierWeights();
std::vector<std::string> DebugGetVertexLoaderIDs();
std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
@ -160,31 +169,10 @@ protected:
int decodedVerts_ = 0;
GEPrimitiveType prevPrim_ = GE_PRIM_INVALID;
// Fixed index buffer for easy quad generation from spline/bezier
u16 *quadIndices_ = nullptr;
// Shader blending state
bool fboTexNeedBind_ = false;
bool fboTexBound_ = false;
// Hardware tessellation
int numPatches;
class TessellationDataTransfer {
protected:
// TODO: These aren't used by all backends.
int prevSize;
int prevSizeTex;
int prevSizeCol;
public:
virtual ~TessellationDataTransfer() {}
// Send spline/bezier's control points to vertex shader through floating point texture.
virtual void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) {
posStride = 4;
texStride = 4;
colStride = 4;
}
virtual void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) = 0;
virtual void EndFrame() {}
};
TessellationDataTransfer *tessDataTransfer;
};

View File

@ -53,6 +53,7 @@ std::string VertexShaderDesc(const ShaderID &id) {
if (id.Bit(VS_BIT_SPLINE)) desc << "Spline ";
if (id.Bit(VS_BIT_HAS_COLOR_TESS)) desc << "TessC ";
if (id.Bit(VS_BIT_HAS_TEXCOORD_TESS)) desc << "TessT ";
if (id.Bit(VS_BIT_HAS_NORMAL_TESS)) desc << "TessN ";
if (id.Bit(VS_BIT_NORM_REVERSE_TESS)) desc << "TessRevN ";
return desc.str();
@ -73,6 +74,7 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)
bool doSpline = gstate_c.spline;
bool hasColorTess = (gstate.vertType & GE_VTYPE_COL_MASK) != 0 && (doBezier || doSpline);
bool hasTexcoordTess = (gstate.vertType & GE_VTYPE_TC_MASK) != 0 && (doBezier || doSpline);
bool hasNormalTess = (gstate.vertType & GE_VTYPE_NRM_MASK) != 0 && (doBezier || doSpline);
bool enableFog = gstate.isFogEnabled() && !isModeThrough && !gstate.isModeClear();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;
@ -139,6 +141,7 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)
id.SetBit(VS_BIT_SPLINE, doSpline);
id.SetBit(VS_BIT_HAS_COLOR_TESS, hasColorTess);
id.SetBit(VS_BIT_HAS_TEXCOORD_TESS, hasTexcoordTess);
id.SetBit(VS_BIT_HAS_NORMAL_TESS, hasNormalTess);
id.SetBit(VS_BIT_NORM_REVERSE_TESS, gstate.isPatchNormalsReversed());
}
}

View File

@ -24,7 +24,7 @@ enum {
VS_BIT_HAS_COLOR_TESS = 12, // 1 bit
VS_BIT_HAS_TEXCOORD_TESS = 13, // 1 bit
VS_BIT_NORM_REVERSE_TESS = 14, // 1 bit
// 15 is free.
VS_BIT_HAS_NORMAL_TESS = 15, // 1 bit
VS_BIT_UVGEN_MODE = 16,
VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0
VS_BIT_LS0 = 18, // 2

View File

@ -240,7 +240,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
}
if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v);
ub->spline_counts = gstate_c.spline_num_points_u;
}
if (dirtyUniforms & DIRTY_DEPAL) {

File diff suppressed because it is too large Load Diff

View File

@ -16,11 +16,15 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include <unordered_map>
#include "Common/CommonTypes.h"
#include "Common/Swap.h"
#include "GPU/Math3D.h"
#include "GPU/ge_constants.h"
#include "Core/Config.h"
#define HALF_CEIL(x) (x + 1) / 2 // Integer ceil = (int)ceil((float)x / 2.0f)
// PSP compatible format so we can use the end of the pipeline in beziers etc
struct SimpleVertex {
@ -33,32 +37,11 @@ struct SimpleVertex {
Vec3Packedf pos;
};
// We decode all vertices into a common format for easy interpolation and stuff.
// Not fast but can be optimized later.
struct BezierPatch {
const SimpleVertex *points[16];
class SimpleBufferManager;
// These are used to generate UVs.
int u_index, v_index;
namespace Spline {
int index;
GEPatchPrimType primType;
bool computeNormals;
bool patchFacing;
};
struct SplinePatchLocal {
const SimpleVertex **points;
int tess_u;
int tess_v;
int count_u;
int count_v;
int type_u;
int type_v;
bool computeNormals;
bool patchFacing;
GEPatchPrimType primType;
};
void BuildIndex(u16 *indices, int &count, int num_u, int num_v, GEPatchPrimType prim_type, int total = 0);
enum SplineQuality {
LOW_QUALITY = 0,
@ -66,6 +49,207 @@ enum SplineQuality {
HIGH_QUALITY = 2,
};
class Bezier3DWeight;
class Spline3DWeight;
// We decode all vertices into a common format for easy interpolation and stuff.
// Not fast but can be optimized later.
struct SurfaceInfo {
int tess_u, tess_v;
int num_points_u, num_points_v;
int num_patches_u, num_patches_v;
int type_u, type_v;
GEPatchPrimType primType;
bool patchFacing;
void Init() {
// If specified as 0, uses 1.
if (tess_u < 1) tess_u = 1;
if (tess_v < 1) tess_v = 1;
switch (g_Config.iSplineBezierQuality) {
case LOW_QUALITY:
tess_u = 2;
tess_v = 2;
break;
case MEDIUM_QUALITY:
// Don't cut below 2, though.
if (tess_u > 2) tess_u = HALF_CEIL(tess_u);
if (tess_v > 2) tess_v = HALF_CEIL(tess_v);
break;
}
}
};
struct BezierSurface : public SurfaceInfo {
using WeightType = Bezier3DWeight;
int num_verts_per_patch;
void Init(int maxVertices) {
SurfaceInfo::Init();
// Downsample until it fits, in case crazy tessellation factors are sent.
while ((tess_u + 1) * (tess_v + 1) * num_patches_u * num_patches_v > maxVertices) {
tess_u--;
tess_v--;
}
num_verts_per_patch = (tess_u + 1) * (tess_v + 1);
}
int GetTessStart(int patch) const { return 0; }
int GetPointIndex(int patch_u, int patch_v) const { return patch_v * 3 * num_points_u + patch_u * 3; }
int GetIndexU(int patch_u, int tile_u) const { return tile_u; }
int GetIndexV(int patch_v, int tile_v) const { return tile_v; }
int GetIndex(int index_u, int index_v, int patch_u, int patch_v) const {
int patch_index = patch_v * num_patches_u + patch_u;
return index_v * (tess_u + 1) + index_u + num_verts_per_patch * patch_index;
}
void BuildIndex(u16 *indices, int &count) const {
for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) {
for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) {
int patch_index = patch_v * num_patches_u + patch_u;
int total = patch_index * num_verts_per_patch;
Spline::BuildIndex(indices + count, count, tess_u, tess_v, primType, total);
}
}
}
};
struct SplineSurface : public SurfaceInfo {
using WeightType = Spline3DWeight;
int num_vertices_u;
void Init(int maxVertices) {
SurfaceInfo::Init();
// Downsample until it fits, in case crazy tessellation factors are sent.
while ((num_patches_u * tess_u + 1) * (num_patches_v * tess_v + 1) > maxVertices) {
tess_u--;
tess_v--;
}
num_vertices_u = num_patches_u * tess_u + 1;
}
int GetTessStart(int patch) const { return (patch == 0) ? 0 : 1; }
int GetPointIndex(int patch_u, int patch_v) const { return patch_v * num_points_u + patch_u; }
int GetIndexU(int patch_u, int tile_u) const { return patch_u * tess_u + tile_u; }
int GetIndexV(int patch_v, int tile_v) const { return patch_v * tess_v + tile_v; }
int GetIndex(int index_u, int index_v, int patch_u, int patch_v) const {
return index_v * num_vertices_u + index_u;
}
void BuildIndex(u16 *indices, int &count) const {
Spline::BuildIndex(indices, count, num_patches_u * tess_u, num_patches_v * tess_v, primType);
}
};
struct Weight {
float basis[4], deriv[4];
};
template<class T>
class WeightCache : public T {
private:
std::unordered_map<u32, Weight*> weightsCache;
public:
Weight* operator [] (u32 key) {
Weight *&weights = weightsCache[key];
if (!weights)
weights = T::CalcWeightsAll(key);
return weights;
}
void Clear() {
for (auto it : weightsCache)
delete[] it.second;
weightsCache.clear();
}
};
struct Weight2D {
const Weight *u, *v;
int size_u, size_v;
template<class T>
Weight2D(WeightCache<T> &cache, u32 key_u, u32 key_v) {
u = cache[key_u];
v = (key_u != key_v) ? cache[key_v] : u; // Use same weights if u == v
}
};
struct ControlPoints {
Vec3f *pos;
Vec2f *tex;
Vec4f *col;
u32_le defcolor;
ControlPoints() {}
ControlPoints(const SimpleVertex *const *points, int size, SimpleBufferManager &managedBuf);
void Convert(const SimpleVertex *const *points, int size);
};
struct OutputBuffers {
SimpleVertex *vertices;
u16 *indices;
int count;
};
template<class Surface>
void SoftwareTessellation(OutputBuffers &output, const Surface &surface, u32 origVertType, const ControlPoints &points);
} // namespace Spline
bool CanUseHardwareTessellation(GEPatchPrimType prim);
void TessellateSplinePatch(u8 *&dest, u16 *indices, int &count, const SplinePatchLocal &spatch, u32 origVertType, int maxVertices);
void TessellateBezierPatch(u8 *&dest, u16 *&indices, int &count, int tess_u, int tess_v, const BezierPatch &patch, u32 origVertType);
// Define function object for TemplateParameterDispatcher
#define TEMPLATE_PARAMETER_DISPATCHER_FUNCTION(NAME, FUNCNAME, FUNCTYPE) \
struct NAME { \
template<bool ...Params> \
static FUNCTYPE GetFunc() { \
return &FUNCNAME<Params...>; \
} \
};
template<typename Func, int NumParams, class Dispatcher>
class TemplateParameterDispatcher {
/* Store all combinations of template functions into an array */
template<int LoopCount, int Index = 0, bool ...Params>
struct Initializer {
static void Init(Func funcs[]) {
Initializer<LoopCount - 1, (Index << 1) + 1, true, Params...>::Init(funcs); // true
Initializer<LoopCount - 1, (Index << 1) + 0, false, Params...>::Init(funcs); // false
}
};
/* Specialized for terminates the recursive loop */
template<int Index, bool ...Params>
struct Initializer<0, Index, Params...> {
static void Init(Func funcs[]) {
funcs[Index] = Dispatcher::template GetFunc<Params...>(); // Resolve the nested dependent name as template function.
}
};
private:
Func funcs[1 << NumParams]; /* Function pointers array */
public:
TemplateParameterDispatcher() {
Initializer<NumParams>::Init(funcs);
}
Func GetFunc(const bool params[]) const {
/* Convert bool parameters to index of the array */
int index = 0;
for (int i = 0; i < NumParams; ++i)
index |= params[i] << i;
return funcs[index];
}
};

View File

@ -102,7 +102,7 @@ public:
: indices(indices), indexType(vertType & GE_VTYPE_IDX_MASK) {
}
inline u32 convert(u32 index) const {
u32 operator() (u32 index) const {
switch (indexType) {
case GE_VTYPE_IDX_8BIT:
return indices8[index];

View File

@ -89,7 +89,6 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
@ -104,14 +103,14 @@ DrawEngineD3D11::~DrawEngineD3D11() {
DestroyDeviceObjects();
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
}
void DrawEngineD3D11::InitDeviceObjects() {
pushVerts_ = new PushBufferD3D11(device_, VERTEX_PUSH_SIZE, D3D11_BIND_VERTEX_BUFFER);
pushInds_ = new PushBufferD3D11(device_, INDEX_PUSH_SIZE, D3D11_BIND_INDEX_BUFFER);
tessDataTransfer = new TessellationDataTransferD3D11(context_, device_);
tessDataTransferD3D11 = new TessellationDataTransferD3D11(context_, device_);
tessDataTransfer = tessDataTransferD3D11;
}
void DrawEngineD3D11::ClearTrackedVertexArrays() {
@ -137,7 +136,7 @@ void DrawEngineD3D11::Resized() {
void DrawEngineD3D11::DestroyDeviceObjects() {
ClearTrackedVertexArrays();
ClearInputLayoutMap();
delete tessDataTransfer;
delete tessDataTransferD3D11;
delete pushVerts_;
delete pushInds_;
depthStencilCache_.Iterate([&](const uint64_t &key, ID3D11DepthStencilState *ds) {
@ -539,10 +538,7 @@ rotateVBO:
memcpy(iptr, decIndex, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
if (tess)
context_->DrawIndexedInstanced(vertexCount, numPatches, 0, 0, 0);
else
context_->DrawIndexed(vertexCount, 0, 0);
context_->DrawIndexed(vertexCount, 0, 0);
} else {
context_->Draw(vertexCount, 0);
}
@ -551,10 +547,7 @@ rotateVBO:
context_->IASetVertexBuffers(0, 1, &vb_, &stride, &offset);
if (useElements) {
context_->IASetIndexBuffer(ib_, DXGI_FORMAT_R16_UINT, 0);
if (tess)
context_->DrawIndexedInstanced(vertexCount, numPatches, 0, 0, 0);
else
context_->DrawIndexed(vertexCount, 0, 0);
context_->DrawIndexed(vertexCount, 0, 0);
} else {
context_->Draw(vertexCount, 0);
}
@ -692,38 +685,85 @@ rotateVBO:
GPUDebug::NotifyDraw();
}
void DrawEngineD3D11::TessellationDataTransferD3D11::PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) {
TessellationDataTransferD3D11::TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device)
: context_(context), device_(device) {
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
}
TessellationDataTransferD3D11::~TessellationDataTransferD3D11() {
for (int i = 0; i < 3; ++i) {
if (buf[i]) buf[i]->Release();
if (view[i]) view[i]->Release();
}
}
void TessellationDataTransferD3D11::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
struct TessData {
float pos[3]; float pad1;
float uv[2]; float pad2[2];
float color[4];
};
int size = size_u * size_v;
if (prevSize < size) {
prevSize = size;
if (buf) {
buf->Release();
view->Release();
}
if (buf[0]) buf[0]->Release();
if (view[0]) view[0]->Release();
desc.ByteWidth = size * sizeof(TessData);
desc.StructureByteStride = sizeof(TessData);
device_->CreateBuffer(&desc, nullptr, &buf);
device_->CreateShaderResourceView(buf, 0, &view);
context_->VSSetShaderResources(0, 1, &view);
device_->CreateBuffer(&desc, nullptr, &buf[0]);
device_->CreateShaderResourceView(buf[0], nullptr, &view[0]);
context_->VSSetShaderResources(0, 1, &view[0]);
}
D3D11_MAPPED_SUBRESOURCE map;
context_->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
context_->Map(buf[0], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
uint8_t *data = (uint8_t *)map.pData;
pos = (float *)(data);
tex = (float *)(data + offsetof(TessData, uv));
col = (float *)(data + offsetof(TessData, color));
posStride = sizeof(TessData) / sizeof(float);
colStride = hasColor ? (sizeof(TessData) / sizeof(float)) : 0;
texStride = sizeof(TessData) / sizeof(float);
}
float *pos = (float *)(data);
float *tex = (float *)(data + offsetof(TessData, uv));
float *col = (float *)(data + offsetof(TessData, color));
int stride = sizeof(TessData) / sizeof(float);
void DrawEngineD3D11::TessellationDataTransferD3D11::SendDataToShader(const float * pos, const float * tex, const float * col, int size, bool hasColor, bool hasTexCoords) {
context_->Unmap(buf, 0);
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
context_->Unmap(buf[0], 0);
using Spline::Weight;
// Weights U
if (prevSizeWU < weights.size_u) {
prevSizeWU = weights.size_u;
if (buf[1]) buf[1]->Release();
if (view[1]) view[1]->Release();
desc.ByteWidth = weights.size_u * sizeof(Weight);
desc.StructureByteStride = sizeof(Weight);
device_->CreateBuffer(&desc, nullptr, &buf[1]);
device_->CreateShaderResourceView(buf[1], nullptr, &view[1]);
context_->VSSetShaderResources(1, 1, &view[1]);
}
context_->Map(buf[1], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, weights.u, weights.size_u * sizeof(Weight));
context_->Unmap(buf[1], 0);
// Weights V
if (prevSizeWV < weights.size_v) {
prevSizeWV = weights.size_v;
if (buf[2]) buf[2]->Release();
if (view[2]) view[2]->Release();
desc.ByteWidth = weights.size_v * sizeof(Weight);
desc.StructureByteStride = sizeof(Weight);
device_->CreateBuffer(&desc, nullptr, &buf[2]);
device_->CreateShaderResourceView(buf[2], nullptr, &view[2]);
context_->VSSetShaderResources(2, 1, &view[2]);
}
context_->Map(buf[2], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, weights.v, weights.size_v * sizeof(Weight));
context_->Unmap(buf[2], 0);
}

View File

@ -99,6 +99,22 @@ public:
u8 flags;
};
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
private:
ID3D11DeviceContext *context_;
ID3D11Device *device_;
ID3D11Buffer *buf[3]{};
ID3D11ShaderResourceView *view[3]{};
D3D11_BUFFER_DESC desc{};
int prevSize = 0;
int prevSizeWU = 0, prevSizeWV = 0;
public:
TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device);
~TessellationDataTransferD3D11();
// Send spline/bezier's control points and weights to vertex shader through structured shader buffer.
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
};
// Handles transform, lighting and drawing.
class DrawEngineD3D11 : public DrawEngineCommon {
public:
@ -199,29 +215,5 @@ private:
D3D11DynamicState dynState_{};
// Hardware tessellation
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
private:
ID3D11DeviceContext *context_;
ID3D11Device *device_;
ID3D11Buffer *buf;
ID3D11ShaderResourceView *view;
D3D11_BUFFER_DESC desc;
public:
TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device)
: TessellationDataTransfer(), context_(context), device_(device), buf(), view(), desc() {
desc.Usage = D3D11_USAGE_DYNAMIC;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
}
~TessellationDataTransferD3D11() {
if (buf) {
buf->Release();
view->Release();
}
}
void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) override;
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
};
TessellationDataTransferD3D11 *tessDataTransferD3D11;
};

View File

@ -95,13 +95,13 @@ DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : vai_(256), vertexDeclMap
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
InitDeviceObjects();
tessDataTransfer = new TessellationDataTransferDX9();
tessDataTransferDX9 = new TessellationDataTransferDX9();
tessDataTransfer = tessDataTransferDX9;
device_->CreateVertexDeclaration(TransformedVertexElements, &transformedVertexDecl_);
}
@ -114,14 +114,13 @@ DrawEngineDX9::~DrawEngineDX9() {
DestroyDeviceObjects();
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
vertexDeclMap_.Iterate([&](const uint32_t &key, IDirect3DVertexDeclaration9 *decl) {
if (decl) {
decl->Release();
}
});
vertexDeclMap_.Clear();
delete tessDataTransfer;
delete tessDataTransferDX9;
}
void DrawEngineDX9::InitDeviceObjects() {
@ -624,8 +623,8 @@ rotateVBO:
GPUDebug::NotifyDraw();
}
void DrawEngineDX9::TessellationDataTransferDX9::SendDataToShader(const float * pos, const float * tex, const float * col, int size, bool hasColor, bool hasTexCoords)
{
void TessellationDataTransferDX9::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
// TODO
}
} // namespace

View File

@ -97,6 +97,13 @@ public:
u8 flags;
};
class TessellationDataTransferDX9 : public TessellationDataTransfer {
public:
TessellationDataTransferDX9() {}
~TessellationDataTransferDX9() {}
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
};
// Handles transform, lighting and drawing.
class DrawEngineDX9 : public DrawEngineCommon {
public:
@ -158,16 +165,7 @@ private:
FramebufferManagerDX9 *framebufferManager_ = nullptr;
// Hardware tessellation
class TessellationDataTransferDX9 : public TessellationDataTransfer {
private:
int data_tex[3];
public:
TessellationDataTransferDX9() : TessellationDataTransfer(), data_tex() {
}
~TessellationDataTransferDX9() {
}
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
};
TessellationDataTransferDX9 *tessDataTransferDX9;
};
} // namespace

View File

@ -86,6 +86,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF };
@ -271,82 +272,90 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
WRITE(p, " float3 pos; float pad1;\n");
WRITE(p, " float2 tex; float2 pad2;\n");
WRITE(p, " float4 col;\n");
WRITE(p, "};");
WRITE(p, "};\n");
WRITE(p, "StructuredBuffer<TessData> tess_data : register(t0);\n");
WRITE(p, "struct TessWeight {\n");
WRITE(p, " float4 basis;\n");
WRITE(p, " float4 deriv;\n");
WRITE(p, "};\n");
WRITE(p, "StructuredBuffer<TessWeight> tess_weights_u : register(t1);\n");
WRITE(p, "StructuredBuffer<TessWeight> tess_weights_v : register(t2);\n");
}
const char *init[3] = { "0.0, 0.0", "0.0, 0.0, 0.0", "0.0, 0.0, 0.0, 0.0" };
for (int i = 2; i <= 4; i++) {
// Define 3 types float2, float3, float4
WRITE(p, "float%d tess_sample(in float%d points[16], in float2 weights[4]) {\n", i, i);
WRITE(p, "float%d tess_sample(in float%d points[16], float4x4 weights) {\n", i, i);
WRITE(p, " float%d pos = float%d(%s);\n", i, i, init[i - 2]);
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
WRITE(p, " if (f != 0.0)\n");
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
for (int v = 0; v < 4; ++v) {
for (int u = 0; u < 4; ++u) {
WRITE(p, " pos += weights[%i][%i] * points[%i];\n", v, u, v * 4 + u);
}
}
WRITE(p, " return pos;\n");
WRITE(p, "}\n");
}
if (doSpline) {
WRITE(p, "void spline_knot(int2 num_patches, int2 type, out float2 knot[6], int2 patch_pos) {\n");
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
WRITE(p, " knot[i] = float2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
WRITE(p, " }\n");
// WRITE(p, " if ((type.x & 1) != 0) {\n");
WRITE(p, " if ((type.x == 1) || (type.x == 3)) {\n");
WRITE(p, " if (patch_pos.x <= 2)\n");
WRITE(p, " knot[0].x = 0.0;\n");
WRITE(p, " if (patch_pos.x <= 1)\n");
WRITE(p, " knot[1].x = 0.0;\n");
WRITE(p, " }\n");
// WRITE(p, " if ((type.x & 2) != 0) {\n");
WRITE(p, " if ((type.x == 2) || (type.x == 3)) {\n");
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
WRITE(p, " knot[5].x = num_patches.x;\n");
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
WRITE(p, " knot[4].x = num_patches.x;\n");
WRITE(p, " }\n");
// WRITE(p, " if ((type.y & 1) != 0) {\n");
WRITE(p, " if ((type.y == 1) || (type.y == 3)) {\n");
WRITE(p, " if (patch_pos.y <= 2)\n");
WRITE(p, " knot[0].y = 0.0;\n");
WRITE(p, " if (patch_pos.y <= 1)\n");
WRITE(p, " knot[1].y = 0.0;\n");
WRITE(p, " }\n");
// WRITE(p, " if ((type.y & 2) != 0) {\n");
WRITE(p, " if ((type.y == 2) || (type.y == 3)) {\n");
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
WRITE(p, " knot[5].y = num_patches.y;\n");
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
WRITE(p, " knot[4].y = num_patches.y;\n");
WRITE(p, " }\n");
WRITE(p, "}\n");
WRITE(p, "void spline_weight(float2 t, in float2 knot[6], out float2 weights[4]) {\n");
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
WRITE(p, " float2 t0 = (t - knot[0]);\n");
WRITE(p, " float2 t1 = (t - knot[1]);\n");
WRITE(p, " float2 t2 = (t - knot[2]);\n");
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
WRITE(p, " float2 f30 = t0 / (knot[3] - knot[0]);\n");
WRITE(p, " float2 f41 = t1 / (knot[4] - knot[1]);\n");
WRITE(p, " float2 f52 = t2 / (knot[5] - knot[2]);\n");
WRITE(p, " float2 f31 = t1 / (knot[3] - knot[1]);\n");
WRITE(p, " float2 f42 = t2 / (knot[4] - knot[2]);\n");
WRITE(p, " float2 f32 = t2 / (knot[3] - knot[2]);\n");
WRITE(p, " float2 a = (1.0 - f30)*(1.0 - f31);\n");
WRITE(p, " float2 b = (f31*f41);\n");
WRITE(p, " float2 c = (1.0 - f41)*(1.0 - f42);\n");
WRITE(p, " float2 d = (f42*f52);\n");
WRITE(p, " weights[0] = a - (a*f32);\n");
WRITE(p, " weights[1] = 1.0 - a - b + ((a + b + c - 1.0)*f32);\n");
WRITE(p, " weights[2] = b + ((1.0 - b - c - d)*f32);\n");
WRITE(p, " weights[3] = d*f32;\n");
WRITE(p, "}\n");
WRITE(p, "float4x4 outerProduct(float4 u, float4 v) {\n");
WRITE(p, " return mul((float4x1)v, (float1x4)u);\n");
WRITE(p, "}\n");
WRITE(p, "struct Tess {\n");
WRITE(p, " float3 pos;\n");
if (doTexture)
WRITE(p, " float2 tex;\n");
WRITE(p, " float4 col;\n");
if (hasNormalTess)
WRITE(p, " float3 nrm;\n");
WRITE(p, "};\n");
WRITE(p, "void tessellate(in VS_IN In, out Tess tess) {\n");
WRITE(p, " int2 point_pos = int2(In.position.z, In.normal.z)%s;\n", doBezier ? " * 3" : "");
WRITE(p, " int2 weight_idx = int2(In.position.xy);\n");
// Load 4x4 control points
WRITE(p, " float3 _pos[16];\n");
WRITE(p, " float2 _tex[16];\n");
WRITE(p, " float4 _col[16];\n");
WRITE(p, " int index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index = (%i + point_pos.y) * u_spline_counts + (%i + point_pos.x);\n", i, j);
WRITE(p, " _pos[%i] = tess_data[index].pos;\n", i * 4 + j);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = tess_data[index].tex;\n", i * 4 + j);
if (hasColorTess)
WRITE(p, " _col[%i] = tess_data[index].col;\n", i * 4 + j);
}
}
// Basis polynomials as weight coefficients
WRITE(p, " float4 basis_u = tess_weights_u[weight_idx.x].basis;\n");
WRITE(p, " float4 basis_v = tess_weights_v[weight_idx.y].basis;\n");
WRITE(p, " float4x4 basis = outerProduct(basis_u, basis_v);\n");
// Tessellate
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
if (doTexture) {
if (hasTexcoordTess)
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
else
WRITE(p, " tess.tex = In.normal.xy;\n");
}
if (hasColorTess)
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
else
WRITE(p, " tess.col = u_matambientalpha;\n");
if (hasNormalTess) {
// Derivatives as weight coefficients
WRITE(p, " float4 deriv_u = tess_weights_u[weight_idx.x].deriv;\n");
WRITE(p, " float4 deriv_v = tess_weights_v[weight_idx.y].deriv;\n");
WRITE(p, " float3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
WRITE(p, " float3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
WRITE(p, " tess.nrm = normalize(cross(du, dv));\n");
}
WRITE(p, "}\n");
}
WRITE(p, "VS_OUT main(VS_IN In) {\n");
@ -396,106 +405,14 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
// Hardware tessellation
if (doSpline || doBezier) {
WRITE(p, " uint u_spline_count_u = u_spline_counts & 0xFF;\n");
WRITE(p, " uint u_spline_count_v = (u_spline_counts >> 8) & 0xFF;\n");
WRITE(p, " uint num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3u" : "u_spline_count_u - 3");
WRITE(p, " float2 tess_pos = In.position.xy;\n");
WRITE(p, " int u = In.instanceId %% num_patches_u;\n");
WRITE(p, " int v = In.instanceId / num_patches_u;\n");
WRITE(p, " int2 patch_pos = int2(u, v);\n");
WRITE(p, " float3 _pos[16];\n");
WRITE(p, " float2 _tex[16];\n");
WRITE(p, " float4 _col[16];\n");
WRITE(p, " int index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index = (%i + v%s) * u_spline_count_u + (%i + u%s);\n", i, doBezier ? " * 3" : "", j, doBezier ? " * 3" : "");
WRITE(p, " _pos[%i] = tess_data[index].pos;\n", i * 4 + j);
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[%i] = tess_data[index].tex;\n", i * 4 + j);
if (hasColor && hasColorTess)
WRITE(p, " _col[%i] = tess_data[index].col;\n", i * 4 + j);
}
}
WRITE(p, " float2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else if (doSpline) {
WRITE(p, " int2 spline_num_patches = int2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " int u_spline_type_u = (u_spline_counts >> 16) & 0xFF;\n");
WRITE(p, " int u_spline_type_v = (u_spline_counts >> 24) & 0xFF;\n");
WRITE(p, " int2 spline_type = int2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " float2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " float3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " float2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " float2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " float4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " float4 col = tess_data[0].col;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " float2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
// Hardware tessellation
WRITE(p, " Tess tess;\n");
WRITE(p, " tessellate(In, tess);\n");
WRITE(p, " float2 bernderiv_u[4];\n");
WRITE(p, " float2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = float2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = float2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " float3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " float3 dv = tess_sample(_pos, bernderiv_v);\n");
} else if (doSpline) {
WRITE(p, " float2 tess_next_u = float2(In.normal.x, 0.0);\n");
WRITE(p, " float2 tess_next_v = float2(0.0, In.normal.y);\n");
// Right
WRITE(p, " float2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " float2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " float2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " float2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " float3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " float3 du = pos_r - pos_l;\n");
WRITE(p, " float3 dv = pos_d - pos_u;\n");
}
WRITE(p, " float3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " float3 worldpos = mul(float4(pos.xyz, 1.0), u_world);\n");
if (hasNormal)
WRITE(p, " float3 worldnormal = normalize(mul(float4(%snrm, 0.0), u_world));\n", flipNormalTess ? "-" : "");
WRITE(p, " float3 worldpos = mul(float4(tess.pos.xyz, 1.0), u_world);\n");
if (hasNormalTess)
WRITE(p, " float3 worldnormal = normalize(mul(float4(%stess.nrm, 0.0), u_world));\n", flipNormalTess ? "-" : "");
else
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
} else {
@ -600,9 +517,10 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
const char *diffuseStr = (matUpdate & 2) && hasColor ? "In.color0.rgb" : "u_matdiffuse";
const char *specularStr = (matUpdate & 4) && hasColor ? "In.color0.rgb" : "u_matspecular.rgb";
if (doBezier || doSpline) {
ambientStr = (matUpdate & 1) && hasColor ? "col" : "u_matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "u_matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "u_matspecular.rgb";
// TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background.
ambientStr = (matUpdate & 1) && hasColor ? "tess.col" : "u_matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "tess.col.rgb" : "u_matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "tess.col.rgb" : "u_matspecular.rgb";
}
bool diffuseIsZero = true;
@ -729,7 +647,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
// Lighting doesn't affect color.
if (hasColor) {
if (doBezier || doSpline)
WRITE(p, " Out.v_color0 = col;\n");
WRITE(p, " Out.v_color0 = tess.col;\n");
else
WRITE(p, " Out.v_color0 = In.color0;\n");
} else {
@ -747,7 +665,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
if (scaleUV) {
if (hasTexcoord) {
if (doBezier || doSpline)
WRITE(p, " Out.v_texcoord = float3(tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
WRITE(p, " Out.v_texcoord = float3(tess.tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy, 0.0);\n");
} else {
@ -755,10 +673,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
}
} else {
if (hasTexcoord) {
if (doBezier || doSpline)
WRITE(p, " Out.v_texcoord = float3(tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
} else {
WRITE(p, " Out.v_texcoord = float3(u_uvscaleoffset.zw, 0.0);\n");
}

View File

@ -81,22 +81,21 @@ DrawEngineGLES::DrawEngineGLES(Draw::DrawContext *draw) : vai_(256), draw_(draw)
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
InitDeviceObjects();
tessDataTransfer = new TessellationDataTransferGLES(render_);
tessDataTransferGLES = new TessellationDataTransferGLES(render_);
tessDataTransfer = tessDataTransferGLES;
}
DrawEngineGLES::~DrawEngineGLES() {
DestroyDeviceObjects();
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
delete tessDataTransfer;
delete tessDataTransferGLES;
}
void DrawEngineGLES::DeviceLost() {
@ -166,7 +165,7 @@ void DrawEngineGLES::EndFrame() {
FrameData &frameData = frameData_[render_->GetCurFrame()];
render_->EndPushBuffer(frameData.pushIndex);
render_->EndPushBuffer(frameData.pushVertex);
tessDataTransfer->EndFrame();
tessDataTransferGLES->EndFrame();
}
struct GlTypeInfo {
@ -520,10 +519,7 @@ rotateVBO:
indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer);
render_->BindIndexBuffer(indexBuffer);
}
if (gstate_c.bezier || gstate_c.spline)
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset, numPatches);
else
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset);
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset);
} else {
render_->Draw(glprim[prim], 0, vertexCount);
}
@ -655,46 +651,66 @@ bool DrawEngineGLES::IsCodePtrVertexDecoder(const u8 *ptr) const {
return decJitCache_->IsInSpace(ptr);
}
void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) {
void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
int size = size_u * size_v;
float *pos = new float[size * 4];
float *tex = hasTexCoord ? new float[size * 4] : nullptr;
float *col = hasColor ? new float[size * 4] : nullptr;
int stride = 4;
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
// Removed the 1D texture support, it's unlikely to be relevant for performance.
if (data_tex[0])
renderManager_->DeleteTexture(data_tex[0]);
uint8_t *pos_data = new uint8_t[size * sizeof(float) * 4];
memcpy(pos_data, pos, size * sizeof(float) * 4);
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D);
renderManager_->TextureImage(data_tex[0], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, pos_data, GLRAllocType::NEW, false);
renderManager_->FinalizeTexture(data_tex[0], 0, false);
renderManager_->BindTexture(TEX_SLOT_SPLINE_POS, data_tex[0]);
// Texcoords
if (hasTexCoords) {
if (data_tex[1])
renderManager_->DeleteTexture(data_tex[1]);
uint8_t *tex_data = new uint8_t[size * sizeof(float) * 4];
memcpy(tex_data, tex, size * sizeof(float) * 4);
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D);
renderManager_->TextureImage(data_tex[1], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, tex_data, GLRAllocType::NEW, false);
renderManager_->FinalizeTexture(data_tex[1], 0, false);
renderManager_->BindTexture(TEX_SLOT_SPLINE_NRM, data_tex[1]);
// Control Points
if (prevSizeU < size_u || prevSizeV < size_v) {
prevSizeU = size_u;
prevSizeV = size_v;
if (!data_tex[0])
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D);
renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, GL_RGBA32F, GL_RGBA, GL_FLOAT, nullptr, GLRAllocType::NONE, false);
renderManager_->FinalizeTexture(data_tex[0], 0, false);
}
renderManager_->BindTexture(TEX_SLOT_SPLINE_POINTS, data_tex[0]);
// Position
renderManager_->TextureSubImage(data_tex[0], 0, 0, 0, size_u, size_v, GL_RGBA, GL_FLOAT, (u8 *)pos, GLRAllocType::NEW);
// Texcoord
if (hasTexCoord)
renderManager_->TextureSubImage(data_tex[0], 0, size_u, 0, size_u, size_v, GL_RGBA, GL_FLOAT, (u8 *)tex, GLRAllocType::NEW);
// Color
if (hasColor)
renderManager_->TextureSubImage(data_tex[0], 0, size_u * 2, 0, size_u, size_v, GL_RGBA, GL_FLOAT, (u8 *)col, GLRAllocType::NEW);
if (data_tex[2])
renderManager_->DeleteTexture(data_tex[2]);
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D);
int sizeColor = hasColor ? size : 1;
uint8_t *col_data = new uint8_t[sizeColor * sizeof(float) * 4];
memcpy(col_data, col, sizeColor * sizeof(float) * 4);
// Weight U
if (prevSizeWU < weights.size_u) {
prevSizeWU = weights.size_u;
if (!data_tex[1])
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D);
renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, nullptr, GLRAllocType::NONE, false);
renderManager_->FinalizeTexture(data_tex[1], 0, false);
}
renderManager_->BindTexture(TEX_SLOT_SPLINE_WEIGHTS_U, data_tex[1]);
renderManager_->TextureSubImage(data_tex[1], 0, 0, 0, weights.size_u * 2, 1, GL_RGBA, GL_FLOAT, (u8 *)weights.u, GLRAllocType::NONE);
renderManager_->TextureImage(data_tex[2], 0, sizeColor, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, col_data, GLRAllocType::NEW, false);
renderManager_->FinalizeTexture(data_tex[2], 0, false);
renderManager_->BindTexture(TEX_SLOT_SPLINE_COL, data_tex[2]);
// Weight V
if (prevSizeWV < weights.size_v) {
prevSizeWV = weights.size_v;
if (!data_tex[2])
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D);
renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, nullptr, GLRAllocType::NONE, false);
renderManager_->FinalizeTexture(data_tex[2], 0, false);
}
renderManager_->BindTexture(TEX_SLOT_SPLINE_WEIGHTS_V, data_tex[2]);
renderManager_->TextureSubImage(data_tex[2], 0, 0, 0, weights.size_v * 2, 1, GL_RGBA, GL_FLOAT, (u8 *)weights.v, GLRAllocType::NONE);
}
void DrawEngineGLES::TessellationDataTransferGLES::EndFrame() {
void TessellationDataTransferGLES::EndFrame() {
for (int i = 0; i < 3; i++) {
if (data_tex[i]) {
renderManager_->DeleteTexture(data_tex[i]);
data_tex[i] = nullptr;
}
}
prevSizeU = prevSizeV = prevSizeWU = prevSizeWV = 0;
}

View File

@ -46,9 +46,9 @@ enum {
TEX_SLOT_SHADERBLEND_SRC = 1,
TEX_SLOT_ALPHATEST = 2,
TEX_SLOT_CLUT = 3,
TEX_SLOT_SPLINE_POS = 4,
TEX_SLOT_SPLINE_NRM = 5,
TEX_SLOT_SPLINE_COL = 6,
TEX_SLOT_SPLINE_POINTS = 4,
TEX_SLOT_SPLINE_WEIGHTS_U = 5,
TEX_SLOT_SPLINE_WEIGHTS_V = 6,
};
@ -110,6 +110,23 @@ public:
u8 flags;
};
class TessellationDataTransferGLES : public TessellationDataTransfer {
private:
GLRTexture *data_tex[3]{};
int prevSizeU = 0, prevSizeV = 0;
int prevSizeWU = 0, prevSizeWV = 0;
GLRenderManager *renderManager_;
public:
TessellationDataTransferGLES(GLRenderManager *renderManager)
: renderManager_(renderManager) { }
~TessellationDataTransferGLES() {
EndFrame();
}
// Send spline/bezier's control points and weights to vertex shader through floating point texture.
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
void EndFrame(); // Queues textures for deletion.
};
// Handles transform, lighting and drawing.
class DrawEngineGLES : public DrawEngineCommon {
public:
@ -208,17 +225,5 @@ private:
int bufferDecimationCounter_ = 0;
// Hardware tessellation
class TessellationDataTransferGLES : public TessellationDataTransfer {
private:
GLRTexture *data_tex[3]{};
GLRenderManager *renderManager_;
public:
TessellationDataTransferGLES(GLRenderManager *renderManager)
: renderManager_(renderManager) { }
~TessellationDataTransferGLES() {
EndFrame();
}
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
void EndFrame() override; // Queues textures for deletion.
};
TessellationDataTransferGLES *tessDataTransferGLES;
};

View File

@ -109,8 +109,7 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
if (g_Config.bHardwareTessellation) {
// Disable hardware tessellation if device is unsupported.
bool hasTexelFetch = gl_extensions.GLES3 || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 3, 0)) || gl_extensions.EXT_gpu_shader4;
if (!gstate_c.SupportsAll(GPU_SUPPORTS_INSTANCE_RENDERING | GPU_SUPPORTS_VERTEX_TEXTURE_FETCH | GPU_SUPPORTS_TEXTURE_FLOAT) || !hasTexelFetch) {
// TODO: Check unsupported device name list.(Above gpu features are supported but it has issues with weak gpu, memory, shader compiler etc...)
if (!gstate_c.SupportsAll(GPU_SUPPORTS_VERTEX_TEXTURE_FETCH | GPU_SUPPORTS_TEXTURE_FLOAT) || !hasTexelFetch) {
g_Config.bHardwareTessellation = false;
ERROR_LOG(G3D, "Hardware Tessellation is unsupported, falling back to software tessellation");
I18NCategory *gr = GetI18NCategory("Graphics");

View File

@ -159,13 +159,10 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
// We need to fetch these unconditionally, gstate_c.spline or bezier will not be set if we
// create this shader at load time from the shader cache.
queries.push_back({ &u_tess_pos_tex, "u_tess_pos_tex" });
queries.push_back({ &u_tess_tex_tex, "u_tess_tex_tex" });
queries.push_back({ &u_tess_col_tex, "u_tess_col_tex" });
queries.push_back({ &u_spline_count_u, "u_spline_count_u" });
queries.push_back({ &u_spline_count_v, "u_spline_count_v" });
queries.push_back({ &u_spline_type_u, "u_spline_type_u" });
queries.push_back({ &u_spline_type_v, "u_spline_type_v" });
queries.push_back({ &u_tess_points, "u_tess_points" });
queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
queries.push_back({ &u_spline_counts, "u_spline_counts" });
queries.push_back({ &u_depal, "u_depal" });
attrMask = vs->GetAttrMask();
@ -176,9 +173,9 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
initialize.push_back({ &u_fbotex, 0, 1 });
initialize.push_back({ &u_testtex, 0, 2 });
initialize.push_back({ &u_pal, 0, 3 }); // CLUT
initialize.push_back({ &u_tess_pos_tex, 0, 4 }); // Texture unit 4
initialize.push_back({ &u_tess_tex_tex, 0, 5 }); // Texture unit 5
initialize.push_back({ &u_tess_col_tex, 0, 6 }); // Texture unit 6
initialize.push_back({ &u_tess_points, 0, 4 }); // Control Points
initialize.push_back({ &u_tess_weights_u, 0, 5 });
initialize.push_back({ &u_tess_weights_v, 0, 6 });
program = render->CreateProgram(shaders, semantics, queries, initialize, gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND);
@ -567,13 +564,9 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
}
if (dirty & DIRTY_BEZIERSPLINE) {
render_->SetUniformI1(&u_spline_count_u, gstate_c.spline_count_u);
if (u_spline_count_v != -1)
render_->SetUniformI1(&u_spline_count_v, gstate_c.spline_count_v);
if (u_spline_type_u != -1)
render_->SetUniformI1(&u_spline_type_u, gstate_c.spline_type_u);
if (u_spline_type_v != -1)
render_->SetUniformI1(&u_spline_type_v, gstate_c.spline_type_v);
if (u_spline_counts != -1) {
render_->SetUniformI1(&u_spline_counts, gstate_c.spline_num_points_u);
}
}
}

View File

@ -117,13 +117,11 @@ public:
int u_lightspecular[4]; // attenuation
int u_lightambient[4]; // attenuation
int u_tess_pos_tex;
int u_tess_tex_tex;
int u_tess_col_tex;
int u_spline_count_u;
int u_spline_count_v;
int u_spline_type_u;
int u_spline_type_v;
// Spline Tessellation
int u_tess_points; // Control Points
int u_tess_weights_u;
int u_tess_weights_v;
int u_spline_counts;
};
// Real public interface

View File

@ -193,6 +193,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
const char *shading = "";
@ -379,83 +380,88 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
if (doBezier || doSpline) {
*uniformMask |= DIRTY_BEZIERSPLINE;
WRITE(p, "uniform sampler2D u_tess_pos_tex;\n");
WRITE(p, "uniform sampler2D u_tess_tex_tex;\n");
WRITE(p, "uniform sampler2D u_tess_col_tex;\n");
WRITE(p, "uniform sampler2D u_tess_points;\n"); // Control Points
WRITE(p, "uniform sampler2D u_tess_weights_u;\n");
WRITE(p, "uniform sampler2D u_tess_weights_v;\n");
WRITE(p, "uniform int u_spline_count_u;\n");
WRITE(p, "uniform int u_spline_counts;\n");
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
WRITE(p, "vec%d tess_sample(in vec%d points[16], mat4 weights) {\n", i, i);
WRITE(p, " vec%d pos = vec%d(0.0);\n", i, i);
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
WRITE(p, " if (f != 0.0)\n");
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
for (int v = 0; v < 4; ++v) {
for (int u = 0; u < 4; ++u) {
WRITE(p, " pos += weights[%i][%i] * points[%i];\n", v, u, v * 4 + u);
}
}
WRITE(p, " return pos;\n");
WRITE(p, "}\n");
}
if (doSpline) {
WRITE(p, "uniform int u_spline_count_v;\n");
WRITE(p, "uniform int u_spline_type_u;\n");
WRITE(p, "uniform int u_spline_type_v;\n");
WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
WRITE(p, " knot[i] = vec2(float(i + patch_pos.x - 2), float(i + patch_pos.y - 2));\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 1) != 0) {\n");
WRITE(p, " if (patch_pos.x <= 2)\n");
WRITE(p, " knot[0].x = 0.0;\n");
WRITE(p, " if (patch_pos.x <= 1)\n");
WRITE(p, " knot[1].x = 0.0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 2) != 0) {\n");
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
WRITE(p, " knot[5].x = float(num_patches.x);\n");
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
WRITE(p, " knot[4].x = float(num_patches.x);\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 1) != 0) {\n");
WRITE(p, " if (patch_pos.y <= 2)\n");
WRITE(p, " knot[0].y = 0.0;\n");
WRITE(p, " if (patch_pos.y <= 1)\n");
WRITE(p, " knot[1].y = 0.0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 2) != 0) {\n");
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
WRITE(p, " knot[5].y = float(num_patches.y);\n");
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
WRITE(p, " knot[4].y = float(num_patches.y);\n");
WRITE(p, " }\n");
WRITE(p, "}\n");
WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
WRITE(p, " vec2 t0 = (t - knot[0]);\n");
WRITE(p, " vec2 t1 = (t - knot[1]);\n");
WRITE(p, " vec2 t2 = (t - knot[2]);\n");
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n");
WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n");
WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n");
WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n");
WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n");
WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n");
WRITE(p, " vec2 a = (1.0 - f30)*(1.0 - f31);\n");
WRITE(p, " vec2 b = (f31*f41);\n");
WRITE(p, " vec2 c = (1.0 - f41)*(1.0 - f42);\n");
WRITE(p, " vec2 d = (f42*f52);\n");
WRITE(p, " weights[0] = a - (a*f32);\n");
WRITE(p, " weights[1] = vec2(1.0) - a - b + ((a + b + c - vec2(1.0))*f32);\n");
WRITE(p, " weights[2] = b + ((vec2(1.0) - b - c - d)*f32);\n");
WRITE(p, " weights[3] = d*f32;\n");
if (!gl_extensions.VersionGEThan(3, 0, 0)) { // For glsl version 1.10
WRITE(p, "mat4 outerProduct(vec4 u, vec4 v) {\n");
WRITE(p, " return mat4(u * v[0], u * v[1], u * v[2], u * v[3]);\n");
WRITE(p, "}\n");
}
WRITE(p, "struct Tess {\n");
WRITE(p, " vec3 pos;\n");
if (doTexture)
WRITE(p, " vec2 tex;\n");
WRITE(p, " vec4 col;\n");
if (hasNormalTess)
WRITE(p, " vec3 nrm;\n");
WRITE(p, "};\n");
WRITE(p, "void tessellate(out Tess tess) {\n");
WRITE(p, " ivec2 point_pos = ivec2(position.z, normal.z)%s;\n", doBezier ? " * 3" : "");
WRITE(p, " ivec2 weight_idx = ivec2(position.xy);\n");
// Load 4x4 control points
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int index_u, index_v;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index_u = (%i + point_pos.x);\n", j);
WRITE(p, " index_v = (%i + point_pos.y);\n", i);
WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index_u, index_v), 0).xyz;\n", i * 4 + j, texelFetch);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts, index_v), 0).xy;\n", i * 4 + j, texelFetch);
if (hasColorTess)
WRITE(p, " _col[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts * 2, index_v), 0).rgba;\n", i * 4 + j, texelFetch);
}
}
// Basis polynomials as weight coefficients
WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(weight_idx.x * 2, 0)");
WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(weight_idx.y * 2, 0)");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
// Tessellate
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
if (doTexture) {
if (hasTexcoordTess)
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
else
WRITE(p, " tess.tex = normal.xy;\n");
}
if (hasColorTess)
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
else
WRITE(p, " tess.col = u_matambientalpha;\n");
if (hasNormalTess) {
// Derivatives as weight coefficients
WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)");
WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)");
WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
WRITE(p, " tess.nrm = normalize(cross(du, dv));\n");
}
WRITE(p, "}\n");
}
WRITE(p, "void main() {\n");
@ -494,101 +500,14 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
// Hardware tessellation
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3" : "u_spline_count_u - 3");
WRITE(p, " int u = int(mod(float(gl_InstanceID), float(num_patches_u)));\n");
WRITE(p, " int v = gl_InstanceID / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int index = (i + v%s) * u_spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = %s(u_tess_pos_tex, ivec2(index, 0), 0).xyz;\n", texelFetch);
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = %s(u_tess_tex_tex, ivec2(index, 0), 0).xy;\n", texelFetch);
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = %s(u_tess_col_tex, ivec2(index, 0), 0).rgba;\n", texelFetch);
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
WRITE(p, " ivec2 spline_type = ivec2(u_spline_type_u, u_spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + vec2(patch_pos), knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + vec2(patch_pos);\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = %s(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n", texelFetch);
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
// Hardware tessellation
WRITE(p, " Tess tess;\n");
WRITE(p, " tessellate(tess);\n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0.0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0.0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + vec2(patch_pos), knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = (u_world * vec4(pos.xyz, 1.0)).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
WRITE(p, " vec3 worldpos = (u_world * vec4(tess.pos.xyz, 1.0)).xyz;\n");
if (hasNormalTess) {
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%stess.nrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
@ -692,9 +611,10 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
const char *diffuseStr = (matUpdate & 2) && hasColor ? "color0.rgb" : "u_matdiffuse";
const char *specularStr = (matUpdate & 4) && hasColor ? "color0.rgb" : "u_matspecular.rgb";
if (doBezier || doSpline) {
ambientStr = (matUpdate & 1) && hasColor ? "col" : "u_matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "u_matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "u_matspecular.rgb";
// TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background.
ambientStr = (matUpdate & 1) && hasColor ? "tess.col" : "u_matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "tess.col.rgb" : "u_matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "tess.col.rgb" : "u_matspecular.rgb";
}
bool diffuseIsZero = true;
@ -821,7 +741,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
// Lighting doesn't affect color.
if (hasColor) {
if (doBezier || doSpline)
WRITE(p, " v_color0 = col;\n");
WRITE(p, " v_color0 = tess.col;\n");
else
WRITE(p, " v_color0 = color0;\n");
} else {
@ -839,9 +759,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
if (scaleUV) {
if (hasTexcoord) {
if (doBezier || doSpline)
// TODO: Need fix?
// Fix to avoid temporarily texture animation bug with hardware tessellation.
WRITE(p, " v_texcoord = vec3(tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
WRITE(p, " v_texcoord = vec3(tess.tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy, 0.0);\n");
} else {
@ -849,10 +767,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
}
} else {
if (hasTexcoord) {
if (doBezier || doSpline)
WRITE(p, " v_texcoord = vec3(tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
} else {
WRITE(p, " v_texcoord = vec3(u_uvscaleoffset.zw, 0.0);\n");
}

View File

@ -1718,8 +1718,6 @@ bail:
}
void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
drawEngineCommon_->DispatchFlush();
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
@ -1760,9 +1758,9 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
if (CanUseHardwareTessellation(patchPrim)) {
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
gstate_c.bezier = true;
if (gstate_c.spline_count_u != bz_ucount) {
if (gstate_c.spline_num_points_u != bz_ucount) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
gstate_c.spline_count_u = bz_ucount;
gstate_c.spline_num_points_u = bz_ucount;
}
}
@ -1780,8 +1778,6 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
}
void GPUCommon::Execute_Spline(u32 op, u32 diff) {
drawEngineCommon_->DispatchFlush();
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
@ -1824,14 +1820,9 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
if (CanUseHardwareTessellation(patchPrim)) {
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
gstate_c.spline = true;
bool countsChanged = gstate_c.spline_count_u != sp_ucount || gstate_c.spline_count_v != sp_vcount;
bool typesChanged = gstate_c.spline_type_u != sp_utype || gstate_c.spline_type_v != sp_vtype;
if (countsChanged || typesChanged) {
if (gstate_c.spline_num_points_u != sp_ucount) {
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
gstate_c.spline_count_u = sp_ucount;
gstate_c.spline_count_v = sp_vcount;
gstate_c.spline_type_u = sp_utype;
gstate_c.spline_type_v = sp_vtype;
gstate_c.spline_num_points_u = sp_ucount;
}
}

View File

@ -603,10 +603,7 @@ struct GPUStateCache {
bool bezier;
bool spline;
int spline_count_u;
int spline_count_v;
int spline_type_u;
int spline_type_v;
int spline_num_points_u;
bool useShaderDepal;
GEBufferFormat depalFramebufferFormat;

View File

@ -102,11 +102,49 @@ float Vec3<float>::Distance2To(Vec3<float> &other)
return Vec3<float>(other-(*this)).Length2();
}
#if defined(_M_SSE)
__m128 SSENormalizeMultiplierSSE2(__m128 v)
{
const __m128 sq = _mm_mul_ps(v, v);
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2));
const __m128 res = _mm_add_ss(r3, _mm_add_ss(r2, sq));
const __m128 rt = _mm_rsqrt_ss(res);
return _mm_shuffle_ps(rt, rt, _MM_SHUFFLE(0, 0, 0, 0));
}
#if _M_SSE >= 0x401
__m128 SSENormalizeMultiplierSSE4(__m128 v)
{
return _mm_rsqrt_ps(_mm_dp_ps(v, v, 0xFF));
}
__m128 SSENormalizeMultiplier(bool useSSE4, __m128 v)
{
if (useSSE4)
return SSENormalizeMultiplierSSE4(v);
return SSENormalizeMultiplierSSE2(v);
}
#else
__m128 SSENormalizeMultiplier(bool useSSE4, __m128 v)
{
return SSENormalizeMultiplierSSE2(v);
}
#endif
template<>
Vec3<float> Vec3<float>::Normalized() const
Vec3<float> Vec3<float>::Normalized(bool useSSE4) const
{
const __m128 normalize = SSENormalizeMultiplier(useSSE4, vec);
return _mm_mul_ps(normalize, vec);
}
#else
template<>
Vec3<float> Vec3<float>::Normalized(bool useSSE4) const
{
return (*this) / Length();
}
#endif
template<>
float Vec3<float>::Normalize()

View File

@ -25,6 +25,9 @@
#if defined(_M_SSE)
#include <emmintrin.h>
#if _M_SSE >= 0x401
#include <smmintrin.h>
#endif
#endif
namespace Math3D {
@ -177,8 +180,6 @@ public:
const Vec2 ts() const { return Vec2(y, x); }
};
typedef Vec2<float> Vec2f;
template<typename T>
class Vec3Packed;
@ -295,7 +296,7 @@ public:
void SetLength(const float l);
Vec3 WithLength(const float l) const;
float Distance2To(Vec3 &other);
Vec3 Normalized() const;
Vec3 Normalized(bool useSSE4 = false) const;
float Normalize(); // returns the previous length, which is often useful
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
@ -817,6 +818,7 @@ private:
}; // namespace Math3D
typedef Math3D::Vec2<float> Vec2f;
typedef Math3D::Vec3<float> Vec3f;
typedef Math3D::Vec3Packed<float> Vec3Packedf;
typedef Math3D::Vec4<float> Vec4f;
@ -1082,6 +1084,69 @@ __forceinline void Vec4<T>::ToRGBA(u8 *rgba) const
*(u32 *)rgba = ToRGBA();
}
#if defined(_M_SSE)
// Specialized for SIMD optimization
// Vec3<float> operation
template<>
inline void Vec3<float>::operator += (const Vec3<float> &other)
{
vec = _mm_add_ps(vec, other.vec);
}
template<>
inline Vec3<float> Vec3<float>::operator + (const Vec3 &other) const
{
return Vec3<float>(_mm_add_ps(vec, other.vec));
}
template<>
inline Vec3<float> Vec3<float>::operator * (const Vec3 &other) const
{
return Vec3<float>(_mm_mul_ps(vec, other.vec));
}
template<> template<>
inline Vec3<float> Vec3<float>::operator * (const float &other) const
{
return Vec3<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
}
// Vec4<float> operation
template<>
inline void Vec4<float>::operator += (const Vec4<float> &other)
{
vec = _mm_add_ps(vec, other.vec);
}
template<>
inline Vec4<float> Vec4<float>::operator + (const Vec4 &other) const
{
return Vec4<float>(_mm_add_ps(vec, other.vec));
}
template<>
inline Vec4<float> Vec4<float>::operator * (const Vec4 &other) const
{
return Vec4<float>(_mm_mul_ps(vec, other.vec));
}
template<> template<>
inline Vec4<float> Vec4<float>::operator * (const float &other) const
{
return Vec4<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
}
// Vec3<float> cross product
template<>
inline Vec3<float> Cross(const Vec3<float> &a, const Vec3<float> &b)
{
const __m128 left = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 1, 0, 2)));
const __m128 right = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 0, 2, 1)));
return _mm_sub_ps(left, right);
}
#endif
}; // namespace Math3D
// linear interpolation via float: 0.0=begin, 1.0=end

View File

@ -42,13 +42,11 @@ SoftwareDrawEngine::SoftwareDrawEngine() {
// All this is a LOT of memory, need to see if we can cut down somehow. Used for splines.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
}
SoftwareDrawEngine::~SoftwareDrawEngine() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
}
void SoftwareDrawEngine::DispatchFlush() {
@ -280,7 +278,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
u16 index_lower_bound = 0;
u16 index_upper_bound = vertex_count - 1;
IndexConverter idxConv(vertex_type, indices);
IndexConverter ConvertIndex(vertex_type, indices);
if (indices)
GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
@ -321,7 +319,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
{
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
@ -380,7 +378,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
int skip_count = data_index == 0 ? 1 : 0;
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
@ -410,7 +408,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
for (int vtx = 0; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}
@ -452,7 +450,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
// Only read the central vertex if we're not continuing.
if (data_index == 0) {
if (indices) {
vreader.Goto(idxConv.convert(0) - index_lower_bound);
vreader.Goto(ConvertIndex(0) - index_lower_bound);
} else {
vreader.Goto(0);
}
@ -463,7 +461,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
if (indices) {
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
} else {
vreader.Goto(vtx);
}

View File

@ -69,6 +69,8 @@ enum {
DRAW_BINDING_DYNUBO_LIGHT = 4,
DRAW_BINDING_DYNUBO_BONE = 5,
DRAW_BINDING_TESS_STORAGE_BUF = 6,
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
};
enum {
@ -87,7 +89,6 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
// All this is a LOT of memory, need to see if we can cut down somehow.
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
indexGen.Setup(decIndex);
@ -96,7 +97,7 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
void DrawEngineVulkan::InitDeviceObjects() {
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
VkDescriptorSetLayoutBinding bindings[7]{};
VkDescriptorSetLayoutBinding bindings[9]{};
bindings[0].descriptorCount = 1;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
@ -126,6 +127,14 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF;
bindings[7].descriptorCount = 1;
bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
bindings[8].descriptorCount = 1;
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
VkDevice device = vulkan_->GetDevice();
@ -167,13 +176,13 @@ void DrawEngineVulkan::InitDeviceObjects() {
vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE);
tessDataTransfer = new TessellationDataTransferVulkan(vulkan_);
tessDataTransferVulkan = new TessellationDataTransferVulkan(vulkan_);
tessDataTransfer = tessDataTransferVulkan;
}
DrawEngineVulkan::~DrawEngineVulkan() {
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
DestroyDeviceObjects();
}
@ -201,8 +210,8 @@ void DrawEngineVulkan::FrameData::Destroy(VulkanContext *vulkan) {
}
void DrawEngineVulkan::DestroyDeviceObjects() {
delete tessDataTransfer;
tessDataTransfer = nullptr;
delete tessDataTransferVulkan;
tessDataTransfer = tessDataTransferVulkan = nullptr;
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
frame_[i].Destroy(vulkan_);
@ -258,7 +267,7 @@ void DrawEngineVulkan::BeginFrame() {
frame->pushIndex->Begin(vulkan_);
// TODO: How can we make this nicer...
((TessellationDataTransferVulkan *)tessDataTransfer)->SetPushBuffer(frame->pushUBO);
tessDataTransferVulkan->SetPushBuffer(frame->pushUBO);
DirtyAllUBOs();
@ -470,23 +479,32 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
n++;
}
// Tessellation data buffer. Make sure this is declared outside the if to avoid optimizer
// shenanigans.
VkDescriptorBufferInfo tess_buf{};
// Tessellation data buffer.
if (tess) {
VkBuffer buf;
VkDeviceSize offset;
VkDeviceSize range;
((TessellationDataTransferVulkan *)tessDataTransfer)->GetBufferAndOffset(&buf, &offset, &range);
assert(buf);
tess_buf.buffer = buf;
tess_buf.offset = offset;
tess_buf.range = range;
tessOffset_ = offset;
const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo();
// Control Points
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF;
writes[n].pBufferInfo = &tess_buf;
writes[n].pBufferInfo = &bufInfo[0];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[n].dstSet = desc;
n++;
// Weights U
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
writes[n].pBufferInfo = &bufInfo[1];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[n].dstSet = desc;
n++;
// Weights V
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
writes[n].pBufferInfo = &bufInfo[2];
writes[n].descriptorCount = 1;
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
writes[n].dstSet = desc;
@ -825,8 +843,7 @@ void DrawEngineVulkan::DoFlush() {
if (useElements) {
if (!ibuf)
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &ibuf);
int numInstances = tess ? numPatches : 1;
renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, numInstances, VK_INDEX_TYPE_UINT16);
renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1, VK_INDEX_TYPE_UINT16);
} else {
renderManager->Draw(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
}
@ -994,16 +1011,7 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) {
}
}
DrawEngineVulkan::TessellationDataTransferVulkan::TessellationDataTransferVulkan(VulkanContext *vulkan)
: TessellationDataTransfer(), vulkan_(vulkan) {
}
DrawEngineVulkan::TessellationDataTransferVulkan::~TessellationDataTransferVulkan() {
}
void DrawEngineVulkan::TessellationDataTransferVulkan::PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) {
colStride = 4;
void TessellationDataTransferVulkan::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
// SSBOs that are not simply float1 or float2 need to be padded up to a float4 size. vec3 members
// also need to be 16-byte aligned, hence the padding.
struct TessData {
@ -1012,18 +1020,28 @@ void DrawEngineVulkan::TessellationDataTransferVulkan::PrepareBuffers(float *&po
float color[4];
};
int size = size_u * size_v;
int ssboAlignment = vulkan_->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDevice()).limits.minStorageBufferOffsetAlignment;
uint8_t *data = (uint8_t *)push_->PushAligned(size * sizeof(TessData), &offset_, &buf_, ssboAlignment);
range_ = size * sizeof(TessData);
uint8_t *data = (uint8_t *)push_->PushAligned(size * sizeof(TessData), (uint32_t *)&bufInfo_[0].offset, &bufInfo_[0].buffer, ssboAlignment);
bufInfo_[0].range = size * sizeof(TessData);
pos = (float *)(data);
tex = (float *)(data + offsetof(TessData, uv));
col = (float *)(data + offsetof(TessData, color));
posStride = sizeof(TessData) / sizeof(float);
colStride = hasColor ? (sizeof(TessData) / sizeof(float)) : 0;
texStride = sizeof(TessData) / sizeof(float);
}
float *pos = (float *)(data);
float *tex = (float *)(data + offsetof(TessData, uv));
float *col = (float *)(data + offsetof(TessData, color));
int stride = sizeof(TessData) / sizeof(float);
void DrawEngineVulkan::TessellationDataTransferVulkan::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) {
// Nothing to do here! The caller will write directly to the pushbuffer through the pointers it got through PrepareBuffers.
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
using Spline::Weight;
// Weights U
data = (uint8_t *)push_->PushAligned(weights.size_u * sizeof(Weight), (uint32_t *)&bufInfo_[1].offset, &bufInfo_[1].buffer, ssboAlignment);
memcpy(data, weights.u, weights.size_u * sizeof(Weight));
bufInfo_[1].range = weights.size_u * sizeof(Weight);
// Weights V
data = (uint8_t *)push_->PushAligned(weights.size_v * sizeof(Weight), (uint32_t *)&bufInfo_[2].offset, &bufInfo_[2].buffer, ssboAlignment);
memcpy(data, weights.v, weights.size_v * sizeof(Weight));
bufInfo_[2].range = weights.size_v * sizeof(Weight);
}

View File

@ -117,6 +117,20 @@ public:
class VulkanRenderManager;
class TessellationDataTransferVulkan : public TessellationDataTransfer {
public:
TessellationDataTransferVulkan(VulkanContext *vulkan) : vulkan_(vulkan) {}
void SetPushBuffer(VulkanPushBuffer *push) { push_ = push; }
// Send spline/bezier's control points and weights to vertex shader through structured shader buffer.
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
const VkDescriptorBufferInfo *GetBufferInfo() { return bufInfo_; }
private:
VulkanContext *vulkan_;
VulkanPushBuffer *push_; // Updated each frame.
VkDescriptorBufferInfo bufInfo_[3]{};
};
// Handles transform, lighting and drawing.
class DrawEngineVulkan : public DrawEngineCommon {
public:
@ -278,31 +292,5 @@ private:
int tessOffset_ = 0;
// Hardware tessellation
class TessellationDataTransferVulkan : public TessellationDataTransfer {
public:
TessellationDataTransferVulkan(VulkanContext *vulkan);
~TessellationDataTransferVulkan();
void SetPushBuffer(VulkanPushBuffer *push) { push_ = push; }
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) override;
void GetBufferAndOffset(VkBuffer *buf, VkDeviceSize *offset, VkDeviceSize *range) {
*buf = buf_;
*offset = (VkDeviceSize)offset_;
*range = (VkDeviceSize)range_;
buf_ = 0;
offset_ = 0;
range_ = 0;
}
private:
VulkanContext *vulkan_;
VulkanPushBuffer *push_; // Updated each frame.
uint32_t offset_ = 0;
uint32_t range_ = 0;
VkBuffer buf_ = VK_NULL_HANDLE;
};
TessellationDataTransferVulkan *tessDataTransferVulkan;
};

View File

@ -133,6 +133,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
bool doSpline = id.Bit(VS_BIT_SPLINE);
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
WRITE(p, "\n");
@ -219,78 +220,90 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
WRITE(p, " vec4 pos;\n");
WRITE(p, " vec4 uv;\n");
WRITE(p, " vec4 color;\n");
WRITE(p, "};");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 0, binding = 6) readonly buffer s_tess_data {\n");
WRITE(p, " TessData data[];");
WRITE(p, " TessData data[];\n");
WRITE(p, "} tess_data;\n");
WRITE(p, "layout (std430) struct TessWeight {\n");
WRITE(p, " vec4 basis;\n");
WRITE(p, " vec4 deriv;\n");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 0, binding = 7) readonly buffer s_tess_weights_u {\n");
WRITE(p, " TessWeight data[];\n");
WRITE(p, "} tess_weights_u;\n");
WRITE(p, "layout (std430, set = 0, binding = 8) readonly buffer s_tess_weights_v {\n");
WRITE(p, " TessWeight data[];\n");
WRITE(p, "} tess_weights_v;\n");
for (int i = 2; i <= 4; i++) {
// Define 3 types vec2, vec3, vec4
WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
WRITE(p, " vec%d pos = vec%d(0);\n", i, i);
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
WRITE(p, " if (f != 0)\n");
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, "vec%d tess_sample(in vec%d points[16], mat4 weights) {\n", i, i);
WRITE(p, " vec%d pos = vec%d(0.0);\n", i, i);
for (int v = 0; v < 4; ++v) {
for (int u = 0; u < 4; ++u) {
WRITE(p, " pos += weights[%i][%i] * points[%i];\n", v, u, v * 4 + u);
}
}
WRITE(p, " return pos;\n");
WRITE(p, "}\n");
}
if (doSpline) {
WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
WRITE(p, " knot[i] = vec2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 1) != 0) {\n");
WRITE(p, " if (patch_pos.x <= 2)\n");
WRITE(p, " knot[0].x = 0;\n");
WRITE(p, " if (patch_pos.x <= 1)\n");
WRITE(p, " knot[1].x = 0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.x & 2) != 0) {\n");
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
WRITE(p, " knot[5].x = num_patches.x;\n");
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
WRITE(p, " knot[4].x = num_patches.x;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 1) != 0) {\n");
WRITE(p, " if (patch_pos.y <= 2)\n");
WRITE(p, " knot[0].y = 0;\n");
WRITE(p, " if (patch_pos.y <= 1)\n");
WRITE(p, " knot[1].y = 0;\n");
WRITE(p, " }\n");
WRITE(p, " if ((type.y & 2) != 0) {\n");
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
WRITE(p, " knot[5].y = num_patches.y;\n");
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
WRITE(p, " knot[4].y = num_patches.y;\n");
WRITE(p, " }\n");
WRITE(p, "}\n");
WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
WRITE(p, " vec2 t0 = (t - knot[0]);\n");
WRITE(p, " vec2 t1 = (t - knot[1]);\n");
WRITE(p, " vec2 t2 = (t - knot[2]);\n");
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n");
WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n");
WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n");
WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n");
WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n");
WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n");
WRITE(p, " vec2 a = (1 - f30)*(1 - f31);\n");
WRITE(p, " vec2 b = (f31*f41);\n");
WRITE(p, " vec2 c = (1 - f41)*(1 - f42);\n");
WRITE(p, " vec2 d = (f42*f52);\n");
WRITE(p, " weights[0] = a - (a*f32);\n");
WRITE(p, " weights[1] = 1 - a - b + ((a + b + c - 1)*f32);\n");
WRITE(p, " weights[2] = b + ((1 - b - c - d)*f32);\n");
WRITE(p, " weights[3] = d*f32;\n");
WRITE(p, "}\n");
WRITE(p, "struct Tess {\n");
WRITE(p, " vec3 pos;\n");
if (doTexture)
WRITE(p, " vec2 tex;\n");
WRITE(p, " vec4 col;\n");
if (hasNormalTess)
WRITE(p, " vec3 nrm;\n");
WRITE(p, "};\n");
WRITE(p, "void tessellate(out Tess tess) {\n");
WRITE(p, " ivec2 point_pos = ivec2(position.z, normal.z)%s;\n", doBezier ? " * 3" : "");
WRITE(p, " ivec2 weight_idx = ivec2(position.xy);\n");
// Load 4x4 control points
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int index;\n");
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
WRITE(p, " index = (%i + point_pos.y) * int(base.spline_counts) + (%i + point_pos.x);\n", i, j);
WRITE(p, " _pos[%i] = tess_data.data[index].pos.xyz;\n", i * 4 + j);
if (doTexture && hasTexcoordTess)
WRITE(p, " _tex[%i] = tess_data.data[index].uv.xy;\n", i * 4 + j);
if (hasColorTess)
WRITE(p, " _col[%i] = tess_data.data[index].color;\n", i * 4 + j);
}
}
// Basis polynomials as weight coefficients
WRITE(p, " vec4 basis_u = tess_weights_u.data[weight_idx.x].basis;\n");
WRITE(p, " vec4 basis_v = tess_weights_v.data[weight_idx.y].basis;\n");
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
// Tessellate
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
if (doTexture) {
if (hasTexcoordTess)
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
else
WRITE(p, " tess.tex = normal.xy;\n");
}
if (hasColorTess)
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
else
WRITE(p, " tess.col = base.matambientalpha;\n");
if (hasNormalTess) {
// Derivatives as weight coefficients
WRITE(p, " vec4 deriv_u = tess_weights_u.data[weight_idx.x].deriv;\n");
WRITE(p, " vec4 deriv_v = tess_weights_v.data[weight_idx.y].deriv;\n");
WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
WRITE(p, " tess.nrm = normalize(cross(du, dv));\n");
}
WRITE(p, "}\n");
}
WRITE(p, "void main() {\n");
@ -330,103 +343,13 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
// Step 1: World Transform / Skinning
if (!enableBones) {
if (doBezier || doSpline) {
WRITE(p, " vec3 _pos[16];\n");
WRITE(p, " vec2 _tex[16];\n");
WRITE(p, " vec4 _col[16];\n");
WRITE(p, " int spline_count_u = int(base.spline_counts & 0xff);\n");
WRITE(p, " int spline_count_v = int((base.spline_counts >> 8) & 0xff);\n");
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(spline_count_u - 1) / 3" : "spline_count_u - 3");
WRITE(p, " int u = int(mod(gl_InstanceIndex, num_patches_u));\n");
WRITE(p, " int v = gl_InstanceIndex / num_patches_u;\n");
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
WRITE(p, " int idx = (i + v%s) * spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
WRITE(p, " _pos[i * 4 + j] = tess_data.data[idx].pos.xyz;\n");
if (doTexture && hasTexcoord && hasTexcoordTess)
WRITE(p, " _tex[i * 4 + j] = tess_data.data[idx].uv.xy;\n");
if (hasColor && hasColorTess)
WRITE(p, " _col[i * 4 + j] = tess_data.data[idx].color;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
WRITE(p, " vec2 tess_pos = position.xy;\n");
WRITE(p, " vec2 weights[4];\n");
if (doBezier) {
// Bernstein 3D
WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
} else { // Spline
WRITE(p, " ivec2 spline_num_patches = ivec2(spline_count_u - 3, spline_count_v - 3);\n");
WRITE(p, " int spline_type_u = int((base.spline_counts >> 16) & 0xff);\n");
WRITE(p, " int spline_type_v = int((base.spline_counts >> 24) & 0xff);\n");
WRITE(p, " ivec2 spline_type = ivec2(spline_type_u, spline_type_v);\n");
WRITE(p, " vec2 knots[6];\n");
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
}
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
if (doTexture && hasTexcoord) {
if (hasTexcoordTess)
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
else
WRITE(p, " vec2 tex = tess_pos + patch_pos;\n");
}
if (hasColor) {
if (hasColorTess)
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
else
WRITE(p, " vec4 col = tess_data.data[0].color;\n");
}
if (hasNormal) {
// Curved surface is probably always need to compute normal(not sampling from control points)
if (doBezier) {
// Bernstein derivative
WRITE(p, " vec2 bernderiv[4];\n");
WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n");
// Hardware tessellation
WRITE(p, " Tess tess;\n");
WRITE(p, " tessellate(tess);\n");
WRITE(p, " vec2 bernderiv_u[4];\n");
WRITE(p, " vec2 bernderiv_v[4];\n");
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
WRITE(p, " }\n");
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
} else { // Spline
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n");
WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n");
// Right
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
// Left
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
// Down
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
// Up
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
WRITE(p, " vec3 du = pos_r - pos_l;\n");
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
}
WRITE(p, " vec3 nrm = cross(du, dv);\n");
WRITE(p, " nrm = normalize(nrm);\n");
}
WRITE(p, " vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
WRITE(p, " vec3 worldpos = vec4(tess.pos.xyz, 1.0) * base.world_mtx;\n");
if (hasNormalTess) {
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%stess.nrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
} else {
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
}
@ -483,9 +406,10 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse";
const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb";
if (doBezier || doSpline) {
ambientStr = (matUpdate & 1) && hasColor ? "col" : "base.matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "light.matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "light.matspecular.rgb";
// TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background.
ambientStr = (matUpdate & 1) && hasColor ? "tess.col" : "base.matambientalpha";
diffuseStr = (matUpdate & 2) && hasColor ? "tess.col.rgb" : "light.matdiffuse";
specularStr = (matUpdate & 4) && hasColor ? "tess.col.rgb" : "light.matspecular.rgb";
}
bool diffuseIsZero = true;
@ -606,7 +530,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
// Lighting doesn't affect color.
if (hasColor) {
if (doBezier || doSpline)
WRITE(p, " v_color0 = col;\n");
WRITE(p, " v_color0 = tess.col;\n");
else
WRITE(p, " v_color0 = color0;\n");
} else {
@ -627,7 +551,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
if (scaleUV) {
if (hasTexcoord) {
if (doBezier || doSpline)
WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
WRITE(p, " v_texcoord = vec3(tess.tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy, 0.0);\n");
} else {
@ -635,10 +559,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
}
} else {
if (hasTexcoord) {
if (doBezier || doSpline)
WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
else
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
} else {
WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n");
}

View File

@ -80,7 +80,7 @@ bool GameSettingsScreen::UseVerticalLayout() const {
// This needs before run CheckGPUFeatures()
// TODO: Remove this if fix the issue
bool CheckSupportInstancedTessellationGLES() {
bool CheckSupportShaderTessellationGLES() {
#if PPSSPP_PLATFORM(UWP)
return true;
#else
@ -88,21 +88,17 @@ bool CheckSupportInstancedTessellationGLES() {
int maxVertexTextureImageUnits = gl_extensions.maxVertexTextureUnits;
bool vertexTexture = maxVertexTextureImageUnits >= 3; // At least 3 for hardware tessellation
bool canUseInstanceID = gl_extensions.EXT_draw_instanced || gl_extensions.ARB_draw_instanced;
bool canDefInstanceID = gl_extensions.IsGLES || gl_extensions.EXT_gpu_shader4 || gl_extensions.VersionGEThan(3, 1);
bool instanceRendering = gl_extensions.GLES3 || (canUseInstanceID && canDefInstanceID);
bool textureFloat = gl_extensions.ARB_texture_float || gl_extensions.OES_texture_float;
bool hasTexelFetch = gl_extensions.GLES3 || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 3, 0)) || gl_extensions.EXT_gpu_shader4;
return instanceRendering && vertexTexture && textureFloat && hasTexelFetch;
return vertexTexture && textureFloat && hasTexelFetch;
#endif
}
bool DoesBackendSupportHWTess() {
switch (GetGPUBackend()) {
case GPUBackend::OPENGL:
return CheckSupportInstancedTessellationGLES();
return CheckSupportShaderTessellationGLES();
case GPUBackend::VULKAN:
case GPUBackend::DIRECT3D11:
return true;
@ -392,11 +388,10 @@ void GameSettingsScreen::CreateViews() {
}
return UI::EVENT_CONTINUE;
});
beziersChoice->SetDisabledPtr(&g_Config.bHardwareTessellation);
CheckBox *tessellationHW = graphicsSettings->Add(new CheckBox(&g_Config.bHardwareTessellation, gr->T("Hardware Tessellation")));
tessellationHW->OnClick.Add([=](EventParams &e) {
settingInfo_->Show(gr->T("HardwareTessellation Tip", "Uses hardware to make curves, always uses a fixed quality"), e.v);
settingInfo_->Show(gr->T("HardwareTessellation Tip", "Uses hardware to make curves"), e.v);
return UI::EVENT_CONTINUE;
});
tessHWEnable_ = DoesBackendSupportHWTess() && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;

View File

@ -26,6 +26,7 @@
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/SplineCommon.h"
#include "GPU/GPUState.h"
#include "Common/MemoryUtil.h"
static const char preview_fs[] =
"#ifdef GL_ES\n"
@ -164,96 +165,104 @@ u32 CGEDebugger::PrimPreviewOp() {
}
static void ExpandBezier(int &count, int op, const std::vector<SimpleVertex> &simpleVerts, const std::vector<u16> &indices, std::vector<SimpleVertex> &generatedVerts, std::vector<u16> &generatedInds) {
int count_u = (op & 0x00FF) >> 0;
int count_v = (op & 0xFF00) >> 8;
using namespace Spline;
int tess_u = gstate.getPatchDivisionU();
int tess_v = gstate.getPatchDivisionV();
if (tess_u < 1) {
tess_u = 1;
}
if (tess_v < 1) {
tess_v = 1;
}
int count_u = (op >> 0) & 0xFF;
int count_v = (op >> 8) & 0xFF;
// Real hardware seems to draw nothing when given < 4 either U or V.
if (count_u < 4 || count_v < 4)
return;
// Bezier patches share less control points than spline patches. Otherwise they are pretty much the same (except bezier don't support the open/close thing)
int num_patches_u = (count_u - 1) / 3;
int num_patches_v = (count_v - 1) / 3;
int total_patches = num_patches_u * num_patches_v;
std::vector<BezierPatch> patches;
patches.resize(total_patches);
for (int patch_u = 0; patch_u < num_patches_u; patch_u++) {
for (int patch_v = 0; patch_v < num_patches_v; patch_v++) {
BezierPatch &patch = patches[patch_u + patch_v * num_patches_u];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u;
patch.points[point] = &simpleVerts[0] + (!indices.empty() ? indices[idx] : idx);
}
patch.u_index = patch_u * 3;
patch.v_index = patch_v * 3;
patch.index = patch_v * num_patches_u + patch_u;
patch.primType = gstate.getPatchPrimitiveType();
patch.computeNormals = false;
patch.patchFacing = false;
}
}
BezierSurface surface;
surface.num_points_u = count_u;
surface.num_points_v = count_v;
surface.tess_u = gstate.getPatchDivisionU();
surface.tess_v = gstate.getPatchDivisionV();
surface.num_patches_u = (count_u - 1) / 3;
surface.num_patches_v = (count_v - 1) / 3;
surface.primType = gstate.getPatchPrimitiveType();
surface.patchFacing = false;
generatedVerts.resize((tess_u + 1) * (tess_v + 1) * total_patches);
generatedInds.resize(tess_u * tess_v * 6 * total_patches);
int num_points = count_u * count_v;
// Make an array of pointers to the control points, to get rid of indices.
std::vector<const SimpleVertex *> points(num_points);
for (int idx = 0; idx < num_points; idx++)
points[idx] = simpleVerts.data() + (!indices.empty() ? indices[idx] : idx);
count = 0;
u8 *dest = (u8 *)&generatedVerts[0];
u16 *inds = &generatedInds[0];
for (int patch_idx = 0; patch_idx < total_patches; ++patch_idx) {
const BezierPatch &patch = patches[patch_idx];
TessellateBezierPatch(dest, inds, count, tess_u, tess_v, patch, gstate.vertType);
}
int total_patches = surface.num_patches_u * surface.num_patches_v;
generatedVerts.resize((surface.tess_u + 1) * (surface.tess_v + 1) * total_patches);
generatedInds.resize(surface.tess_u * surface.tess_v * 6 * total_patches);
OutputBuffers output;
output.vertices = generatedVerts.data();
output.indices = generatedInds.data();
output.count = 0;
ControlPoints cpoints;
cpoints.pos = (Vec3f *)AllocateAlignedMemory(sizeof(Vec3f) * num_points, 16);
cpoints.tex = (Vec2f *)AllocateAlignedMemory(sizeof(Vec2f) * num_points, 16);
cpoints.col = (Vec4f *)AllocateAlignedMemory(sizeof(Vec4f) * num_points, 16);
cpoints.Convert(points.data(), num_points);
surface.Init(generatedVerts.size());
SoftwareTessellation(output, surface, gstate.vertType, cpoints);
count = output.count;
FreeAlignedMemory(cpoints.pos);
FreeAlignedMemory(cpoints.tex);
FreeAlignedMemory(cpoints.col);
}
static void ExpandSpline(int &count, int op, const std::vector<SimpleVertex> &simpleVerts, const std::vector<u16> &indices, std::vector<SimpleVertex> &generatedVerts, std::vector<u16> &generatedInds) {
SplinePatchLocal patch;
patch.computeNormals = false;
patch.primType = gstate.getPatchPrimitiveType();
patch.patchFacing = false;
patch.count_u = (op & 0x00FF) >> 0;
patch.count_v = (op & 0xFF00) >> 8;
patch.type_u = (op >> 16) & 0x3;
patch.type_v = (op >> 18) & 0x3;
patch.tess_u = gstate.getPatchDivisionU();
patch.tess_v = gstate.getPatchDivisionV();
if (patch.tess_u < 1) {
patch.tess_u = 1;
}
if (patch.tess_v < 1) {
patch.tess_v = 1;
}
using namespace Spline;
int count_u = (op >> 0) & 0xFF;
int count_v = (op >> 8) & 0xFF;
// Real hardware seems to draw nothing when given < 4 either U or V.
if (patch.count_u < 4 || patch.count_v < 4) {
if (count_u < 4 || count_v < 4)
return;
}
std::vector<const SimpleVertex *> points;
points.resize(patch.count_u * patch.count_v);
SplineSurface surface;
surface.num_points_u = count_u;
surface.num_points_v = count_v;
surface.tess_u = gstate.getPatchDivisionU();
surface.tess_v = gstate.getPatchDivisionV();
surface.type_u = (op >> 16) & 0x3;
surface.type_v = (op >> 18) & 0x3;
surface.num_patches_u = count_u - 3;
surface.num_patches_v = count_v - 3;
surface.primType = gstate.getPatchPrimitiveType();
surface.patchFacing = false;
int num_points = count_u * count_v;
// Make an array of pointers to the control points, to get rid of indices.
for (int idx = 0; idx < patch.count_u * patch.count_v; idx++) {
points[idx] = &simpleVerts[0] + (!indices.empty() ? indices[idx] : idx);
}
patch.points = &points[0];
std::vector<const SimpleVertex *> points(num_points);
for (int idx = 0; idx < num_points; idx++)
points[idx] = simpleVerts.data() + (!indices.empty() ? indices[idx] : idx);
int patch_div_s = (patch.count_u - 3) * patch.tess_u;
int patch_div_t = (patch.count_v - 3) * patch.tess_v;
int maxVertexCount = (patch_div_s + 1) * (patch_div_t + 1);
generatedVerts.resize(maxVertexCount);
int patch_div_s = surface.num_patches_u * surface.tess_u;
int patch_div_t = surface.num_patches_v * surface.tess_v;
generatedVerts.resize((patch_div_s + 1) * (patch_div_t + 1));
generatedInds.resize(patch_div_s * patch_div_t * 6);
count = 0;
u8 *dest = (u8 *)&generatedVerts[0];
TessellateSplinePatch(dest, &generatedInds[0], count, patch, gstate.vertType, maxVertexCount);
OutputBuffers output;
output.vertices = generatedVerts.data();
output.indices = generatedInds.data();
output.count = 0;
ControlPoints cpoints;
cpoints.pos = (Vec3f *)AllocateAlignedMemory(sizeof(Vec3f) * num_points, 16);
cpoints.tex = (Vec2f *)AllocateAlignedMemory(sizeof(Vec2f) * num_points, 16);
cpoints.col = (Vec4f *)AllocateAlignedMemory(sizeof(Vec4f) * num_points, 16);
cpoints.Convert(points.data(), num_points);
surface.Init(generatedVerts.size());
SoftwareTessellation(output, surface, gstate.vertType, cpoints);
count = output.count;
FreeAlignedMemory(cpoints.pos);
FreeAlignedMemory(cpoints.tex);
FreeAlignedMemory(cpoints.col);
}
void CGEDebugger::UpdatePrimPreview(u32 op, int which) {

View File

@ -306,14 +306,14 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps, bool ski
glBindTexture(tex->target, tex->texture);
boundTexture = tex->texture;
}
if (!step.texture_image.data)
if (!step.texture_image.data && step.texture_image.allocType != GLRAllocType::NONE)
Crash();
// For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage.
glTexImage2D(tex->target, step.texture_image.level, step.texture_image.internalFormat, step.texture_image.width, step.texture_image.height, 0, step.texture_image.format, step.texture_image.type, step.texture_image.data);
allocatedTextures = true;
if (step.texture_image.allocType == GLRAllocType::ALIGNED) {
FreeAlignedMemory(step.texture_image.data);
} else {
} else if (step.texture_image.allocType == GLRAllocType::NEW) {
delete[] step.texture_image.data;
}
CHECK_GL_ERROR_IF_DEBUG();
@ -490,7 +490,19 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCal
const GLRStep &step = *steps[i];
switch (step.stepType) {
case GLRStepType::RENDER:
// TODO: With #11425 there'll be a case where we should really free spline data here.
for (const auto &c : step.commands) {
switch (c.cmd) {
case GLRRenderCommand::TEXTURE_SUBIMAGE:
if (c.texture_subimage.data) {
if (c.texture_subimage.allocType == GLRAllocType::ALIGNED) {
FreeAlignedMemory(c.texture_subimage.data);
} else if (c.texture_subimage.allocType == GLRAllocType::NEW) {
delete[] c.texture_subimage.data;
}
}
break;
}
}
break;
}
delete steps[i];
@ -1024,6 +1036,22 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
}
break;
}
case GLRRenderCommand::TEXTURE_SUBIMAGE:
{
GLRTexture *tex = c.texture_subimage.texture;
// TODO: Need bind?
if (!c.texture_subimage.data)
Crash();
// For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage.
glTexSubImage2D(tex->target, c.texture_subimage.level, c.texture_subimage.x, c.texture_subimage.y, c.texture_subimage.width, c.texture_subimage.height, c.texture_subimage.format, c.texture_subimage.type, c.texture_subimage.data);
if (c.texture_subimage.allocType == GLRAllocType::ALIGNED) {
FreeAlignedMemory(c.texture_subimage.data);
} else if (c.texture_subimage.allocType == GLRAllocType::NEW) {
delete[] c.texture_subimage.data;
}
CHECK_GL_ERROR_IF_DEBUG();
break;
}
case GLRRenderCommand::RASTER:
if (c.raster.cullEnable) {
if (!cullEnabled) {

View File

@ -20,6 +20,7 @@ struct GLOffset2D {
};
enum class GLRAllocType {
NONE,
NEW,
ALIGNED,
};
@ -57,6 +58,7 @@ enum class GLRRenderCommand : uint8_t {
DRAW,
DRAW_INDEXED,
PUSH_CONSTANTS,
TEXTURE_SUBIMAGE,
};
// TODO: Bloated since the biggest struct decides the size. Will need something more efficient (separate structs with shared
@ -138,6 +140,18 @@ struct GLRRenderData {
int slot;
GLRTexture *texture;
} texture;
struct {
GLRTexture *texture;
GLenum format;
GLenum type;
int level;
int x;
int y;
int width;
int height;
GLRAllocType allocType;
uint8_t *data; // owned, delete[]-d
} texture_subimage;
struct {
int slot;
GLRFramebuffer *framebuffer;

View File

@ -530,6 +530,22 @@ public:
initSteps_.push_back(step);
}
void TextureSubImage(GLRTexture *texture, int level, int x, int y, int width, int height, GLenum format, GLenum type, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) {
_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE };
_data.texture_subimage.texture = texture;
_data.texture_subimage.data = data;
_data.texture_subimage.format = format;
_data.texture_subimage.type = type;
_data.texture_subimage.level = level;
_data.texture_subimage.x = x;
_data.texture_subimage.y = y;
_data.texture_subimage.width = width;
_data.texture_subimage.height = height;
_data.texture_subimage.allocType = allocType;
curRenderStep_->commands.push_back(_data);
}
void FinalizeTexture(GLRTexture *texture, int maxLevels, bool genMips) {
GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE };
step.texture_finalize.texture = texture;