mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-10-07 19:03:29 +00:00
Merge pull request #11425 from xebra/refactor_spline_bezier
[Refactoring] Improve spline/bezier.
This commit is contained in:
commit
22c066515e
@ -34,7 +34,6 @@ enum {
|
||||
};
|
||||
|
||||
DrawEngineCommon::DrawEngineCommon() : decoderMap_(16) {
|
||||
quadIndices_ = new u16[6 * QUAD_INDICES_MAX];
|
||||
decJitCache_ = new VertexDecoderJitCache();
|
||||
transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
@ -43,11 +42,11 @@ DrawEngineCommon::DrawEngineCommon() : decoderMap_(16) {
|
||||
DrawEngineCommon::~DrawEngineCommon() {
|
||||
FreeMemoryPages(transformed, TRANSFORMED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(transformedExpanded, 3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
|
||||
delete[] quadIndices_;
|
||||
delete decJitCache_;
|
||||
decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) {
|
||||
delete decoder;
|
||||
});
|
||||
ClearSplineBezierWeights();
|
||||
}
|
||||
|
||||
VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
|
||||
@ -739,3 +738,25 @@ void DrawEngineCommon::SubmitPrim(void *verts, void *inds, GEPrimitiveType prim,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TessellationDataTransfer::CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType) {
|
||||
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
|
||||
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
memcpy(pos, points[i]->pos.AsArray(), 3 * sizeof(float));
|
||||
pos += posStride;
|
||||
}
|
||||
if (hasTexCoord) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
memcpy(tex, points[i]->uv, 2 * sizeof(float));
|
||||
tex += texStride;
|
||||
}
|
||||
}
|
||||
if (hasColor) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
memcpy(col, Vec4f::FromRGBA(points[i]->color_32).AsArray(), 4 * sizeof(float));
|
||||
col += colStride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,7 +34,6 @@ enum {
|
||||
VERTEX_BUFFER_MAX = 65536,
|
||||
DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 64,
|
||||
DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 16,
|
||||
SPLINE_BUFFER_SIZE = VERTEX_BUFFER_MAX * 26, // At least, this buffer needs greater than 1679616 bytes for Mist Dragon morphing in FF4CC.
|
||||
};
|
||||
|
||||
// Avoiding the full include of TextureDecoder.h.
|
||||
@ -50,6 +49,15 @@ inline uint32_t GetVertTypeID(uint32_t vertType, int uvGenMode) {
|
||||
return (vertType & 0xFFFFFF) | (uvGenMode << 24);
|
||||
}
|
||||
|
||||
struct SimpleVertex;
|
||||
namespace Spline { struct Weight2D; }
|
||||
|
||||
class TessellationDataTransfer {
|
||||
public:
|
||||
void CopyControlPoints(float *pos, float *tex, float *col, int posStride, int texStride, int colStride, const SimpleVertex *const *points, int size, u32 vertType);
|
||||
virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
|
||||
};
|
||||
|
||||
class DrawEngineCommon {
|
||||
public:
|
||||
DrawEngineCommon();
|
||||
@ -75,6 +83,7 @@ public:
|
||||
void SubmitPrim(void *verts, void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
|
||||
void SubmitSpline(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
|
||||
void SubmitBezier(const void *control_points, const void *indices, int tess_u, int tess_v, int count_u, int count_v, GEPatchPrimType prim_type, bool computeNormals, bool patchFacing, u32 vertType, int *bytesRead);
|
||||
void ClearSplineBezierWeights();
|
||||
|
||||
std::vector<std::string> DebugGetVertexLoaderIDs();
|
||||
std::string DebugGetVertexLoaderString(std::string id, DebugShaderStringType stringType);
|
||||
@ -160,31 +169,10 @@ protected:
|
||||
int decodedVerts_ = 0;
|
||||
GEPrimitiveType prevPrim_ = GE_PRIM_INVALID;
|
||||
|
||||
// Fixed index buffer for easy quad generation from spline/bezier
|
||||
u16 *quadIndices_ = nullptr;
|
||||
|
||||
// Shader blending state
|
||||
bool fboTexNeedBind_ = false;
|
||||
bool fboTexBound_ = false;
|
||||
|
||||
// Hardware tessellation
|
||||
int numPatches;
|
||||
class TessellationDataTransfer {
|
||||
protected:
|
||||
// TODO: These aren't used by all backends.
|
||||
int prevSize;
|
||||
int prevSizeTex;
|
||||
int prevSizeCol;
|
||||
public:
|
||||
virtual ~TessellationDataTransfer() {}
|
||||
// Send spline/bezier's control points to vertex shader through floating point texture.
|
||||
virtual void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) {
|
||||
posStride = 4;
|
||||
texStride = 4;
|
||||
colStride = 4;
|
||||
}
|
||||
virtual void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) = 0;
|
||||
virtual void EndFrame() {}
|
||||
};
|
||||
TessellationDataTransfer *tessDataTransfer;
|
||||
};
|
||||
|
@ -53,6 +53,7 @@ std::string VertexShaderDesc(const ShaderID &id) {
|
||||
if (id.Bit(VS_BIT_SPLINE)) desc << "Spline ";
|
||||
if (id.Bit(VS_BIT_HAS_COLOR_TESS)) desc << "TessC ";
|
||||
if (id.Bit(VS_BIT_HAS_TEXCOORD_TESS)) desc << "TessT ";
|
||||
if (id.Bit(VS_BIT_HAS_NORMAL_TESS)) desc << "TessN ";
|
||||
if (id.Bit(VS_BIT_NORM_REVERSE_TESS)) desc << "TessRevN ";
|
||||
|
||||
return desc.str();
|
||||
@ -73,6 +74,7 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)
|
||||
bool doSpline = gstate_c.spline;
|
||||
bool hasColorTess = (gstate.vertType & GE_VTYPE_COL_MASK) != 0 && (doBezier || doSpline);
|
||||
bool hasTexcoordTess = (gstate.vertType & GE_VTYPE_TC_MASK) != 0 && (doBezier || doSpline);
|
||||
bool hasNormalTess = (gstate.vertType & GE_VTYPE_NRM_MASK) != 0 && (doBezier || doSpline);
|
||||
|
||||
bool enableFog = gstate.isFogEnabled() && !isModeThrough && !gstate.isModeClear();
|
||||
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled() && !isModeThrough;
|
||||
@ -139,6 +141,7 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)
|
||||
id.SetBit(VS_BIT_SPLINE, doSpline);
|
||||
id.SetBit(VS_BIT_HAS_COLOR_TESS, hasColorTess);
|
||||
id.SetBit(VS_BIT_HAS_TEXCOORD_TESS, hasTexcoordTess);
|
||||
id.SetBit(VS_BIT_HAS_NORMAL_TESS, hasNormalTess);
|
||||
id.SetBit(VS_BIT_NORM_REVERSE_TESS, gstate.isPatchNormalsReversed());
|
||||
}
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ enum {
|
||||
VS_BIT_HAS_COLOR_TESS = 12, // 1 bit
|
||||
VS_BIT_HAS_TEXCOORD_TESS = 13, // 1 bit
|
||||
VS_BIT_NORM_REVERSE_TESS = 14, // 1 bit
|
||||
// 15 is free.
|
||||
VS_BIT_HAS_NORMAL_TESS = 15, // 1 bit
|
||||
VS_BIT_UVGEN_MODE = 16,
|
||||
VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0
|
||||
VS_BIT_LS0 = 18, // 2
|
||||
|
@ -240,7 +240,7 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
|
||||
ub->spline_counts = BytesToUint32(gstate_c.spline_count_u, gstate_c.spline_count_v, gstate_c.spline_type_u, gstate_c.spline_type_v);
|
||||
ub->spline_counts = gstate_c.spline_num_points_u;
|
||||
}
|
||||
|
||||
if (dirtyUniforms & DIRTY_DEPAL) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,11 +16,15 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Swap.h"
|
||||
#include "GPU/Math3D.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "Core/Config.h"
|
||||
|
||||
#define HALF_CEIL(x) (x + 1) / 2 // Integer ceil = (int)ceil((float)x / 2.0f)
|
||||
|
||||
// PSP compatible format so we can use the end of the pipeline in beziers etc
|
||||
struct SimpleVertex {
|
||||
@ -33,32 +37,11 @@ struct SimpleVertex {
|
||||
Vec3Packedf pos;
|
||||
};
|
||||
|
||||
// We decode all vertices into a common format for easy interpolation and stuff.
|
||||
// Not fast but can be optimized later.
|
||||
struct BezierPatch {
|
||||
const SimpleVertex *points[16];
|
||||
class SimpleBufferManager;
|
||||
|
||||
// These are used to generate UVs.
|
||||
int u_index, v_index;
|
||||
namespace Spline {
|
||||
|
||||
int index;
|
||||
GEPatchPrimType primType;
|
||||
bool computeNormals;
|
||||
bool patchFacing;
|
||||
};
|
||||
|
||||
struct SplinePatchLocal {
|
||||
const SimpleVertex **points;
|
||||
int tess_u;
|
||||
int tess_v;
|
||||
int count_u;
|
||||
int count_v;
|
||||
int type_u;
|
||||
int type_v;
|
||||
bool computeNormals;
|
||||
bool patchFacing;
|
||||
GEPatchPrimType primType;
|
||||
};
|
||||
void BuildIndex(u16 *indices, int &count, int num_u, int num_v, GEPatchPrimType prim_type, int total = 0);
|
||||
|
||||
enum SplineQuality {
|
||||
LOW_QUALITY = 0,
|
||||
@ -66,6 +49,207 @@ enum SplineQuality {
|
||||
HIGH_QUALITY = 2,
|
||||
};
|
||||
|
||||
class Bezier3DWeight;
|
||||
class Spline3DWeight;
|
||||
|
||||
// We decode all vertices into a common format for easy interpolation and stuff.
|
||||
// Not fast but can be optimized later.
|
||||
|
||||
struct SurfaceInfo {
|
||||
int tess_u, tess_v;
|
||||
int num_points_u, num_points_v;
|
||||
int num_patches_u, num_patches_v;
|
||||
int type_u, type_v;
|
||||
GEPatchPrimType primType;
|
||||
bool patchFacing;
|
||||
|
||||
void Init() {
|
||||
// If specified as 0, uses 1.
|
||||
if (tess_u < 1) tess_u = 1;
|
||||
if (tess_v < 1) tess_v = 1;
|
||||
|
||||
switch (g_Config.iSplineBezierQuality) {
|
||||
case LOW_QUALITY:
|
||||
tess_u = 2;
|
||||
tess_v = 2;
|
||||
break;
|
||||
case MEDIUM_QUALITY:
|
||||
// Don't cut below 2, though.
|
||||
if (tess_u > 2) tess_u = HALF_CEIL(tess_u);
|
||||
if (tess_v > 2) tess_v = HALF_CEIL(tess_v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BezierSurface : public SurfaceInfo {
|
||||
using WeightType = Bezier3DWeight;
|
||||
|
||||
int num_verts_per_patch;
|
||||
|
||||
void Init(int maxVertices) {
|
||||
SurfaceInfo::Init();
|
||||
// Downsample until it fits, in case crazy tessellation factors are sent.
|
||||
while ((tess_u + 1) * (tess_v + 1) * num_patches_u * num_patches_v > maxVertices) {
|
||||
tess_u--;
|
||||
tess_v--;
|
||||
}
|
||||
num_verts_per_patch = (tess_u + 1) * (tess_v + 1);
|
||||
}
|
||||
|
||||
int GetTessStart(int patch) const { return 0; }
|
||||
|
||||
int GetPointIndex(int patch_u, int patch_v) const { return patch_v * 3 * num_points_u + patch_u * 3; }
|
||||
|
||||
int GetIndexU(int patch_u, int tile_u) const { return tile_u; }
|
||||
int GetIndexV(int patch_v, int tile_v) const { return tile_v; }
|
||||
|
||||
int GetIndex(int index_u, int index_v, int patch_u, int patch_v) const {
|
||||
int patch_index = patch_v * num_patches_u + patch_u;
|
||||
return index_v * (tess_u + 1) + index_u + num_verts_per_patch * patch_index;
|
||||
}
|
||||
|
||||
void BuildIndex(u16 *indices, int &count) const {
|
||||
for (int patch_u = 0; patch_u < num_patches_u; ++patch_u) {
|
||||
for (int patch_v = 0; patch_v < num_patches_v; ++patch_v) {
|
||||
int patch_index = patch_v * num_patches_u + patch_u;
|
||||
int total = patch_index * num_verts_per_patch;
|
||||
Spline::BuildIndex(indices + count, count, tess_u, tess_v, primType, total);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct SplineSurface : public SurfaceInfo {
|
||||
using WeightType = Spline3DWeight;
|
||||
|
||||
int num_vertices_u;
|
||||
|
||||
void Init(int maxVertices) {
|
||||
SurfaceInfo::Init();
|
||||
// Downsample until it fits, in case crazy tessellation factors are sent.
|
||||
while ((num_patches_u * tess_u + 1) * (num_patches_v * tess_v + 1) > maxVertices) {
|
||||
tess_u--;
|
||||
tess_v--;
|
||||
}
|
||||
num_vertices_u = num_patches_u * tess_u + 1;
|
||||
}
|
||||
|
||||
int GetTessStart(int patch) const { return (patch == 0) ? 0 : 1; }
|
||||
|
||||
int GetPointIndex(int patch_u, int patch_v) const { return patch_v * num_points_u + patch_u; }
|
||||
|
||||
int GetIndexU(int patch_u, int tile_u) const { return patch_u * tess_u + tile_u; }
|
||||
int GetIndexV(int patch_v, int tile_v) const { return patch_v * tess_v + tile_v; }
|
||||
|
||||
int GetIndex(int index_u, int index_v, int patch_u, int patch_v) const {
|
||||
return index_v * num_vertices_u + index_u;
|
||||
}
|
||||
|
||||
void BuildIndex(u16 *indices, int &count) const {
|
||||
Spline::BuildIndex(indices, count, num_patches_u * tess_u, num_patches_v * tess_v, primType);
|
||||
}
|
||||
};
|
||||
|
||||
struct Weight {
|
||||
float basis[4], deriv[4];
|
||||
};
|
||||
|
||||
template<class T>
|
||||
class WeightCache : public T {
|
||||
private:
|
||||
std::unordered_map<u32, Weight*> weightsCache;
|
||||
public:
|
||||
Weight* operator [] (u32 key) {
|
||||
Weight *&weights = weightsCache[key];
|
||||
if (!weights)
|
||||
weights = T::CalcWeightsAll(key);
|
||||
return weights;
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
for (auto it : weightsCache)
|
||||
delete[] it.second;
|
||||
weightsCache.clear();
|
||||
}
|
||||
};
|
||||
|
||||
struct Weight2D {
|
||||
const Weight *u, *v;
|
||||
int size_u, size_v;
|
||||
|
||||
template<class T>
|
||||
Weight2D(WeightCache<T> &cache, u32 key_u, u32 key_v) {
|
||||
u = cache[key_u];
|
||||
v = (key_u != key_v) ? cache[key_v] : u; // Use same weights if u == v
|
||||
}
|
||||
};
|
||||
|
||||
struct ControlPoints {
|
||||
Vec3f *pos;
|
||||
Vec2f *tex;
|
||||
Vec4f *col;
|
||||
u32_le defcolor;
|
||||
|
||||
ControlPoints() {}
|
||||
ControlPoints(const SimpleVertex *const *points, int size, SimpleBufferManager &managedBuf);
|
||||
void Convert(const SimpleVertex *const *points, int size);
|
||||
};
|
||||
|
||||
struct OutputBuffers {
|
||||
SimpleVertex *vertices;
|
||||
u16 *indices;
|
||||
int count;
|
||||
};
|
||||
|
||||
template<class Surface>
|
||||
void SoftwareTessellation(OutputBuffers &output, const Surface &surface, u32 origVertType, const ControlPoints &points);
|
||||
|
||||
} // namespace Spline
|
||||
|
||||
bool CanUseHardwareTessellation(GEPatchPrimType prim);
|
||||
void TessellateSplinePatch(u8 *&dest, u16 *indices, int &count, const SplinePatchLocal &spatch, u32 origVertType, int maxVertices);
|
||||
void TessellateBezierPatch(u8 *&dest, u16 *&indices, int &count, int tess_u, int tess_v, const BezierPatch &patch, u32 origVertType);
|
||||
|
||||
// Define function object for TemplateParameterDispatcher
|
||||
#define TEMPLATE_PARAMETER_DISPATCHER_FUNCTION(NAME, FUNCNAME, FUNCTYPE) \
|
||||
struct NAME { \
|
||||
template<bool ...Params> \
|
||||
static FUNCTYPE GetFunc() { \
|
||||
return &FUNCNAME<Params...>; \
|
||||
} \
|
||||
};
|
||||
|
||||
template<typename Func, int NumParams, class Dispatcher>
|
||||
class TemplateParameterDispatcher {
|
||||
|
||||
/* Store all combinations of template functions into an array */
|
||||
template<int LoopCount, int Index = 0, bool ...Params>
|
||||
struct Initializer {
|
||||
static void Init(Func funcs[]) {
|
||||
Initializer<LoopCount - 1, (Index << 1) + 1, true, Params...>::Init(funcs); // true
|
||||
Initializer<LoopCount - 1, (Index << 1) + 0, false, Params...>::Init(funcs); // false
|
||||
}
|
||||
};
|
||||
/* Specialized for terminates the recursive loop */
|
||||
template<int Index, bool ...Params>
|
||||
struct Initializer<0, Index, Params...> {
|
||||
static void Init(Func funcs[]) {
|
||||
funcs[Index] = Dispatcher::template GetFunc<Params...>(); // Resolve the nested dependent name as template function.
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
Func funcs[1 << NumParams]; /* Function pointers array */
|
||||
public:
|
||||
TemplateParameterDispatcher() {
|
||||
Initializer<NumParams>::Init(funcs);
|
||||
}
|
||||
|
||||
Func GetFunc(const bool params[]) const {
|
||||
/* Convert bool parameters to index of the array */
|
||||
int index = 0;
|
||||
for (int i = 0; i < NumParams; ++i)
|
||||
index |= params[i] << i;
|
||||
|
||||
return funcs[index];
|
||||
}
|
||||
};
|
||||
|
@ -102,7 +102,7 @@ public:
|
||||
: indices(indices), indexType(vertType & GE_VTYPE_IDX_MASK) {
|
||||
}
|
||||
|
||||
inline u32 convert(u32 index) const {
|
||||
u32 operator() (u32 index) const {
|
||||
switch (indexType) {
|
||||
case GE_VTYPE_IDX_8BIT:
|
||||
return indices8[index];
|
||||
|
@ -89,7 +89,6 @@ DrawEngineD3D11::DrawEngineD3D11(Draw::DrawContext *draw, ID3D11Device *device,
|
||||
// All this is a LOT of memory, need to see if we can cut down somehow.
|
||||
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
|
||||
indexGen.Setup(decIndex);
|
||||
|
||||
@ -104,14 +103,14 @@ DrawEngineD3D11::~DrawEngineD3D11() {
|
||||
DestroyDeviceObjects();
|
||||
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::InitDeviceObjects() {
|
||||
pushVerts_ = new PushBufferD3D11(device_, VERTEX_PUSH_SIZE, D3D11_BIND_VERTEX_BUFFER);
|
||||
pushInds_ = new PushBufferD3D11(device_, INDEX_PUSH_SIZE, D3D11_BIND_INDEX_BUFFER);
|
||||
|
||||
tessDataTransfer = new TessellationDataTransferD3D11(context_, device_);
|
||||
tessDataTransferD3D11 = new TessellationDataTransferD3D11(context_, device_);
|
||||
tessDataTransfer = tessDataTransferD3D11;
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::ClearTrackedVertexArrays() {
|
||||
@ -137,7 +136,7 @@ void DrawEngineD3D11::Resized() {
|
||||
void DrawEngineD3D11::DestroyDeviceObjects() {
|
||||
ClearTrackedVertexArrays();
|
||||
ClearInputLayoutMap();
|
||||
delete tessDataTransfer;
|
||||
delete tessDataTransferD3D11;
|
||||
delete pushVerts_;
|
||||
delete pushInds_;
|
||||
depthStencilCache_.Iterate([&](const uint64_t &key, ID3D11DepthStencilState *ds) {
|
||||
@ -539,10 +538,7 @@ rotateVBO:
|
||||
memcpy(iptr, decIndex, iSize);
|
||||
pushInds_->EndPush(context_);
|
||||
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
|
||||
if (tess)
|
||||
context_->DrawIndexedInstanced(vertexCount, numPatches, 0, 0, 0);
|
||||
else
|
||||
context_->DrawIndexed(vertexCount, 0, 0);
|
||||
context_->DrawIndexed(vertexCount, 0, 0);
|
||||
} else {
|
||||
context_->Draw(vertexCount, 0);
|
||||
}
|
||||
@ -551,10 +547,7 @@ rotateVBO:
|
||||
context_->IASetVertexBuffers(0, 1, &vb_, &stride, &offset);
|
||||
if (useElements) {
|
||||
context_->IASetIndexBuffer(ib_, DXGI_FORMAT_R16_UINT, 0);
|
||||
if (tess)
|
||||
context_->DrawIndexedInstanced(vertexCount, numPatches, 0, 0, 0);
|
||||
else
|
||||
context_->DrawIndexed(vertexCount, 0, 0);
|
||||
context_->DrawIndexed(vertexCount, 0, 0);
|
||||
} else {
|
||||
context_->Draw(vertexCount, 0);
|
||||
}
|
||||
@ -692,38 +685,85 @@ rotateVBO:
|
||||
GPUDebug::NotifyDraw();
|
||||
}
|
||||
|
||||
void DrawEngineD3D11::TessellationDataTransferD3D11::PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) {
|
||||
TessellationDataTransferD3D11::TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device)
|
||||
: context_(context), device_(device) {
|
||||
desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
}
|
||||
|
||||
TessellationDataTransferD3D11::~TessellationDataTransferD3D11() {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
if (buf[i]) buf[i]->Release();
|
||||
if (view[i]) view[i]->Release();
|
||||
}
|
||||
}
|
||||
|
||||
void TessellationDataTransferD3D11::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
|
||||
struct TessData {
|
||||
float pos[3]; float pad1;
|
||||
float uv[2]; float pad2[2];
|
||||
float color[4];
|
||||
};
|
||||
|
||||
int size = size_u * size_v;
|
||||
|
||||
if (prevSize < size) {
|
||||
prevSize = size;
|
||||
if (buf) {
|
||||
buf->Release();
|
||||
view->Release();
|
||||
}
|
||||
if (buf[0]) buf[0]->Release();
|
||||
if (view[0]) view[0]->Release();
|
||||
|
||||
desc.ByteWidth = size * sizeof(TessData);
|
||||
desc.StructureByteStride = sizeof(TessData);
|
||||
|
||||
device_->CreateBuffer(&desc, nullptr, &buf);
|
||||
device_->CreateShaderResourceView(buf, 0, &view);
|
||||
context_->VSSetShaderResources(0, 1, &view);
|
||||
device_->CreateBuffer(&desc, nullptr, &buf[0]);
|
||||
device_->CreateShaderResourceView(buf[0], nullptr, &view[0]);
|
||||
context_->VSSetShaderResources(0, 1, &view[0]);
|
||||
}
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
context_->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
|
||||
context_->Map(buf[0], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
|
||||
uint8_t *data = (uint8_t *)map.pData;
|
||||
|
||||
pos = (float *)(data);
|
||||
tex = (float *)(data + offsetof(TessData, uv));
|
||||
col = (float *)(data + offsetof(TessData, color));
|
||||
posStride = sizeof(TessData) / sizeof(float);
|
||||
colStride = hasColor ? (sizeof(TessData) / sizeof(float)) : 0;
|
||||
texStride = sizeof(TessData) / sizeof(float);
|
||||
}
|
||||
float *pos = (float *)(data);
|
||||
float *tex = (float *)(data + offsetof(TessData, uv));
|
||||
float *col = (float *)(data + offsetof(TessData, color));
|
||||
int stride = sizeof(TessData) / sizeof(float);
|
||||
|
||||
void DrawEngineD3D11::TessellationDataTransferD3D11::SendDataToShader(const float * pos, const float * tex, const float * col, int size, bool hasColor, bool hasTexCoords) {
|
||||
context_->Unmap(buf, 0);
|
||||
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
|
||||
|
||||
context_->Unmap(buf[0], 0);
|
||||
|
||||
using Spline::Weight;
|
||||
|
||||
// Weights U
|
||||
if (prevSizeWU < weights.size_u) {
|
||||
prevSizeWU = weights.size_u;
|
||||
if (buf[1]) buf[1]->Release();
|
||||
if (view[1]) view[1]->Release();
|
||||
|
||||
desc.ByteWidth = weights.size_u * sizeof(Weight);
|
||||
desc.StructureByteStride = sizeof(Weight);
|
||||
device_->CreateBuffer(&desc, nullptr, &buf[1]);
|
||||
device_->CreateShaderResourceView(buf[1], nullptr, &view[1]);
|
||||
context_->VSSetShaderResources(1, 1, &view[1]);
|
||||
}
|
||||
context_->Map(buf[1], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
|
||||
memcpy(map.pData, weights.u, weights.size_u * sizeof(Weight));
|
||||
context_->Unmap(buf[1], 0);
|
||||
|
||||
// Weights V
|
||||
if (prevSizeWV < weights.size_v) {
|
||||
prevSizeWV = weights.size_v;
|
||||
if (buf[2]) buf[2]->Release();
|
||||
if (view[2]) view[2]->Release();
|
||||
|
||||
desc.ByteWidth = weights.size_v * sizeof(Weight);
|
||||
desc.StructureByteStride = sizeof(Weight);
|
||||
device_->CreateBuffer(&desc, nullptr, &buf[2]);
|
||||
device_->CreateShaderResourceView(buf[2], nullptr, &view[2]);
|
||||
context_->VSSetShaderResources(2, 1, &view[2]);
|
||||
}
|
||||
context_->Map(buf[2], 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
|
||||
memcpy(map.pData, weights.v, weights.size_v * sizeof(Weight));
|
||||
context_->Unmap(buf[2], 0);
|
||||
}
|
||||
|
@ -99,6 +99,22 @@ public:
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
|
||||
private:
|
||||
ID3D11DeviceContext *context_;
|
||||
ID3D11Device *device_;
|
||||
ID3D11Buffer *buf[3]{};
|
||||
ID3D11ShaderResourceView *view[3]{};
|
||||
D3D11_BUFFER_DESC desc{};
|
||||
int prevSize = 0;
|
||||
int prevSizeWU = 0, prevSizeWV = 0;
|
||||
public:
|
||||
TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device);
|
||||
~TessellationDataTransferD3D11();
|
||||
// Send spline/bezier's control points and weights to vertex shader through structured shader buffer.
|
||||
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
|
||||
};
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class DrawEngineD3D11 : public DrawEngineCommon {
|
||||
public:
|
||||
@ -199,29 +215,5 @@ private:
|
||||
D3D11DynamicState dynState_{};
|
||||
|
||||
// Hardware tessellation
|
||||
class TessellationDataTransferD3D11 : public TessellationDataTransfer {
|
||||
private:
|
||||
ID3D11DeviceContext *context_;
|
||||
ID3D11Device *device_;
|
||||
ID3D11Buffer *buf;
|
||||
ID3D11ShaderResourceView *view;
|
||||
D3D11_BUFFER_DESC desc;
|
||||
public:
|
||||
TessellationDataTransferD3D11(ID3D11DeviceContext *context, ID3D11Device *device)
|
||||
: TessellationDataTransfer(), context_(context), device_(device), buf(), view(), desc() {
|
||||
desc.Usage = D3D11_USAGE_DYNAMIC;
|
||||
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
}
|
||||
~TessellationDataTransferD3D11() {
|
||||
if (buf) {
|
||||
buf->Release();
|
||||
view->Release();
|
||||
}
|
||||
}
|
||||
|
||||
void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) override;
|
||||
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
|
||||
};
|
||||
TessellationDataTransferD3D11 *tessDataTransferD3D11;
|
||||
};
|
||||
|
@ -95,13 +95,13 @@ DrawEngineDX9::DrawEngineDX9(Draw::DrawContext *draw) : vai_(256), vertexDeclMap
|
||||
// All this is a LOT of memory, need to see if we can cut down somehow.
|
||||
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
|
||||
indexGen.Setup(decIndex);
|
||||
|
||||
InitDeviceObjects();
|
||||
|
||||
tessDataTransfer = new TessellationDataTransferDX9();
|
||||
tessDataTransferDX9 = new TessellationDataTransferDX9();
|
||||
tessDataTransfer = tessDataTransferDX9;
|
||||
|
||||
device_->CreateVertexDeclaration(TransformedVertexElements, &transformedVertexDecl_);
|
||||
}
|
||||
@ -114,14 +114,13 @@ DrawEngineDX9::~DrawEngineDX9() {
|
||||
DestroyDeviceObjects();
|
||||
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
|
||||
vertexDeclMap_.Iterate([&](const uint32_t &key, IDirect3DVertexDeclaration9 *decl) {
|
||||
if (decl) {
|
||||
decl->Release();
|
||||
}
|
||||
});
|
||||
vertexDeclMap_.Clear();
|
||||
delete tessDataTransfer;
|
||||
delete tessDataTransferDX9;
|
||||
}
|
||||
|
||||
void DrawEngineDX9::InitDeviceObjects() {
|
||||
@ -624,8 +623,8 @@ rotateVBO:
|
||||
GPUDebug::NotifyDraw();
|
||||
}
|
||||
|
||||
void DrawEngineDX9::TessellationDataTransferDX9::SendDataToShader(const float * pos, const float * tex, const float * col, int size, bool hasColor, bool hasTexCoords)
|
||||
{
|
||||
void TessellationDataTransferDX9::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
|
||||
// TODO
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -97,6 +97,13 @@ public:
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
class TessellationDataTransferDX9 : public TessellationDataTransfer {
|
||||
public:
|
||||
TessellationDataTransferDX9() {}
|
||||
~TessellationDataTransferDX9() {}
|
||||
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
|
||||
};
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class DrawEngineDX9 : public DrawEngineCommon {
|
||||
public:
|
||||
@ -158,16 +165,7 @@ private:
|
||||
FramebufferManagerDX9 *framebufferManager_ = nullptr;
|
||||
|
||||
// Hardware tessellation
|
||||
class TessellationDataTransferDX9 : public TessellationDataTransfer {
|
||||
private:
|
||||
int data_tex[3];
|
||||
public:
|
||||
TessellationDataTransferDX9() : TessellationDataTransfer(), data_tex() {
|
||||
}
|
||||
~TessellationDataTransferDX9() {
|
||||
}
|
||||
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
|
||||
};
|
||||
TessellationDataTransferDX9 *tessDataTransferDX9;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -86,6 +86,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
bool doSpline = id.Bit(VS_BIT_SPLINE);
|
||||
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
|
||||
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
|
||||
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
|
||||
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
|
||||
|
||||
DoLightComputation doLight[4] = { LIGHT_OFF, LIGHT_OFF, LIGHT_OFF, LIGHT_OFF };
|
||||
@ -271,82 +272,90 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
WRITE(p, " float3 pos; float pad1;\n");
|
||||
WRITE(p, " float2 tex; float2 pad2;\n");
|
||||
WRITE(p, " float4 col;\n");
|
||||
WRITE(p, "};");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "StructuredBuffer<TessData> tess_data : register(t0);\n");
|
||||
|
||||
WRITE(p, "struct TessWeight {\n");
|
||||
WRITE(p, " float4 basis;\n");
|
||||
WRITE(p, " float4 deriv;\n");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "StructuredBuffer<TessWeight> tess_weights_u : register(t1);\n");
|
||||
WRITE(p, "StructuredBuffer<TessWeight> tess_weights_v : register(t2);\n");
|
||||
}
|
||||
|
||||
const char *init[3] = { "0.0, 0.0", "0.0, 0.0, 0.0", "0.0, 0.0, 0.0, 0.0" };
|
||||
for (int i = 2; i <= 4; i++) {
|
||||
// Define 3 types float2, float3, float4
|
||||
WRITE(p, "float%d tess_sample(in float%d points[16], in float2 weights[4]) {\n", i, i);
|
||||
WRITE(p, "float%d tess_sample(in float%d points[16], float4x4 weights) {\n", i, i);
|
||||
WRITE(p, " float%d pos = float%d(%s);\n", i, i, init[i - 2]);
|
||||
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
|
||||
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
|
||||
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
|
||||
WRITE(p, " if (f != 0.0)\n");
|
||||
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
for (int v = 0; v < 4; ++v) {
|
||||
for (int u = 0; u < 4; ++u) {
|
||||
WRITE(p, " pos += weights[%i][%i] * points[%i];\n", v, u, v * 4 + u);
|
||||
}
|
||||
}
|
||||
WRITE(p, " return pos;\n");
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
if (doSpline) {
|
||||
WRITE(p, "void spline_knot(int2 num_patches, int2 type, out float2 knot[6], int2 patch_pos) {\n");
|
||||
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
|
||||
WRITE(p, " knot[i] = float2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
|
||||
WRITE(p, " }\n");
|
||||
// WRITE(p, " if ((type.x & 1) != 0) {\n");
|
||||
WRITE(p, " if ((type.x == 1) || (type.x == 3)) {\n");
|
||||
WRITE(p, " if (patch_pos.x <= 2)\n");
|
||||
WRITE(p, " knot[0].x = 0.0;\n");
|
||||
WRITE(p, " if (patch_pos.x <= 1)\n");
|
||||
WRITE(p, " knot[1].x = 0.0;\n");
|
||||
WRITE(p, " }\n");
|
||||
// WRITE(p, " if ((type.x & 2) != 0) {\n");
|
||||
WRITE(p, " if ((type.x == 2) || (type.x == 3)) {\n");
|
||||
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
|
||||
WRITE(p, " knot[5].x = num_patches.x;\n");
|
||||
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
|
||||
WRITE(p, " knot[4].x = num_patches.x;\n");
|
||||
WRITE(p, " }\n");
|
||||
// WRITE(p, " if ((type.y & 1) != 0) {\n");
|
||||
WRITE(p, " if ((type.y == 1) || (type.y == 3)) {\n");
|
||||
WRITE(p, " if (patch_pos.y <= 2)\n");
|
||||
WRITE(p, " knot[0].y = 0.0;\n");
|
||||
WRITE(p, " if (patch_pos.y <= 1)\n");
|
||||
WRITE(p, " knot[1].y = 0.0;\n");
|
||||
WRITE(p, " }\n");
|
||||
// WRITE(p, " if ((type.y & 2) != 0) {\n");
|
||||
WRITE(p, " if ((type.y == 2) || (type.y == 3)) {\n");
|
||||
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
|
||||
WRITE(p, " knot[5].y = num_patches.y;\n");
|
||||
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
|
||||
WRITE(p, " knot[4].y = num_patches.y;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, "}\n");
|
||||
|
||||
WRITE(p, "void spline_weight(float2 t, in float2 knot[6], out float2 weights[4]) {\n");
|
||||
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
|
||||
WRITE(p, " float2 t0 = (t - knot[0]);\n");
|
||||
WRITE(p, " float2 t1 = (t - knot[1]);\n");
|
||||
WRITE(p, " float2 t2 = (t - knot[2]);\n");
|
||||
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
|
||||
WRITE(p, " float2 f30 = t0 / (knot[3] - knot[0]);\n");
|
||||
WRITE(p, " float2 f41 = t1 / (knot[4] - knot[1]);\n");
|
||||
WRITE(p, " float2 f52 = t2 / (knot[5] - knot[2]);\n");
|
||||
WRITE(p, " float2 f31 = t1 / (knot[3] - knot[1]);\n");
|
||||
WRITE(p, " float2 f42 = t2 / (knot[4] - knot[2]);\n");
|
||||
WRITE(p, " float2 f32 = t2 / (knot[3] - knot[2]);\n");
|
||||
WRITE(p, " float2 a = (1.0 - f30)*(1.0 - f31);\n");
|
||||
WRITE(p, " float2 b = (f31*f41);\n");
|
||||
WRITE(p, " float2 c = (1.0 - f41)*(1.0 - f42);\n");
|
||||
WRITE(p, " float2 d = (f42*f52);\n");
|
||||
WRITE(p, " weights[0] = a - (a*f32);\n");
|
||||
WRITE(p, " weights[1] = 1.0 - a - b + ((a + b + c - 1.0)*f32);\n");
|
||||
WRITE(p, " weights[2] = b + ((1.0 - b - c - d)*f32);\n");
|
||||
WRITE(p, " weights[3] = d*f32;\n");
|
||||
WRITE(p, "}\n");
|
||||
WRITE(p, "float4x4 outerProduct(float4 u, float4 v) {\n");
|
||||
WRITE(p, " return mul((float4x1)v, (float1x4)u);\n");
|
||||
WRITE(p, "}\n");
|
||||
|
||||
WRITE(p, "struct Tess {\n");
|
||||
WRITE(p, " float3 pos;\n");
|
||||
if (doTexture)
|
||||
WRITE(p, " float2 tex;\n");
|
||||
WRITE(p, " float4 col;\n");
|
||||
if (hasNormalTess)
|
||||
WRITE(p, " float3 nrm;\n");
|
||||
WRITE(p, "};\n");
|
||||
|
||||
WRITE(p, "void tessellate(in VS_IN In, out Tess tess) {\n");
|
||||
WRITE(p, " int2 point_pos = int2(In.position.z, In.normal.z)%s;\n", doBezier ? " * 3" : "");
|
||||
WRITE(p, " int2 weight_idx = int2(In.position.xy);\n");
|
||||
// Load 4x4 control points
|
||||
WRITE(p, " float3 _pos[16];\n");
|
||||
WRITE(p, " float2 _tex[16];\n");
|
||||
WRITE(p, " float4 _col[16];\n");
|
||||
WRITE(p, " int index;\n");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
WRITE(p, " index = (%i + point_pos.y) * u_spline_counts + (%i + point_pos.x);\n", i, j);
|
||||
WRITE(p, " _pos[%i] = tess_data[index].pos;\n", i * 4 + j);
|
||||
if (doTexture && hasTexcoordTess)
|
||||
WRITE(p, " _tex[%i] = tess_data[index].tex;\n", i * 4 + j);
|
||||
if (hasColorTess)
|
||||
WRITE(p, " _col[%i] = tess_data[index].col;\n", i * 4 + j);
|
||||
}
|
||||
}
|
||||
|
||||
// Basis polynomials as weight coefficients
|
||||
WRITE(p, " float4 basis_u = tess_weights_u[weight_idx.x].basis;\n");
|
||||
WRITE(p, " float4 basis_v = tess_weights_v[weight_idx.y].basis;\n");
|
||||
WRITE(p, " float4x4 basis = outerProduct(basis_u, basis_v);\n");
|
||||
|
||||
// Tessellate
|
||||
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
|
||||
if (doTexture) {
|
||||
if (hasTexcoordTess)
|
||||
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
|
||||
else
|
||||
WRITE(p, " tess.tex = In.normal.xy;\n");
|
||||
}
|
||||
if (hasColorTess)
|
||||
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
|
||||
else
|
||||
WRITE(p, " tess.col = u_matambientalpha;\n");
|
||||
if (hasNormalTess) {
|
||||
// Derivatives as weight coefficients
|
||||
WRITE(p, " float4 deriv_u = tess_weights_u[weight_idx.x].deriv;\n");
|
||||
WRITE(p, " float4 deriv_v = tess_weights_v[weight_idx.y].deriv;\n");
|
||||
|
||||
WRITE(p, " float3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
|
||||
WRITE(p, " float3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
|
||||
WRITE(p, " tess.nrm = normalize(cross(du, dv));\n");
|
||||
}
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
WRITE(p, "VS_OUT main(VS_IN In) {\n");
|
||||
@ -396,106 +405,14 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
} else {
|
||||
// Step 1: World Transform / Skinning
|
||||
if (!enableBones) {
|
||||
// Hardware tessellation
|
||||
if (doSpline || doBezier) {
|
||||
WRITE(p, " uint u_spline_count_u = u_spline_counts & 0xFF;\n");
|
||||
WRITE(p, " uint u_spline_count_v = (u_spline_counts >> 8) & 0xFF;\n");
|
||||
WRITE(p, " uint num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3u" : "u_spline_count_u - 3");
|
||||
WRITE(p, " float2 tess_pos = In.position.xy;\n");
|
||||
WRITE(p, " int u = In.instanceId %% num_patches_u;\n");
|
||||
WRITE(p, " int v = In.instanceId / num_patches_u;\n");
|
||||
WRITE(p, " int2 patch_pos = int2(u, v);\n");
|
||||
WRITE(p, " float3 _pos[16];\n");
|
||||
WRITE(p, " float2 _tex[16];\n");
|
||||
WRITE(p, " float4 _col[16];\n");
|
||||
WRITE(p, " int index;\n");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
WRITE(p, " index = (%i + v%s) * u_spline_count_u + (%i + u%s);\n", i, doBezier ? " * 3" : "", j, doBezier ? " * 3" : "");
|
||||
WRITE(p, " _pos[%i] = tess_data[index].pos;\n", i * 4 + j);
|
||||
if (doTexture && hasTexcoord && hasTexcoordTess)
|
||||
WRITE(p, " _tex[%i] = tess_data[index].tex;\n", i * 4 + j);
|
||||
if (hasColor && hasColorTess)
|
||||
WRITE(p, " _col[%i] = tess_data[index].col;\n", i * 4 + j);
|
||||
}
|
||||
}
|
||||
WRITE(p, " float2 weights[4];\n");
|
||||
if (doBezier) {
|
||||
// Bernstein 3D
|
||||
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
|
||||
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
|
||||
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
|
||||
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
|
||||
} else if (doSpline) {
|
||||
WRITE(p, " int2 spline_num_patches = int2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
|
||||
WRITE(p, " int u_spline_type_u = (u_spline_counts >> 16) & 0xFF;\n");
|
||||
WRITE(p, " int u_spline_type_v = (u_spline_counts >> 24) & 0xFF;\n");
|
||||
WRITE(p, " int2 spline_type = int2(u_spline_type_u, u_spline_type_v);\n");
|
||||
WRITE(p, " float2 knots[6];\n");
|
||||
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
|
||||
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
|
||||
}
|
||||
WRITE(p, " float3 pos = tess_sample(_pos, weights);\n");
|
||||
if (doTexture && hasTexcoord) {
|
||||
if (hasTexcoordTess)
|
||||
WRITE(p, " float2 tex = tess_sample(_tex, weights);\n");
|
||||
else
|
||||
WRITE(p, " float2 tex = tess_pos + patch_pos;\n");
|
||||
}
|
||||
if (hasColor) {
|
||||
if (hasColorTess)
|
||||
WRITE(p, " float4 col = tess_sample(_col, weights);\n");
|
||||
else
|
||||
WRITE(p, " float4 col = tess_data[0].col;\n");
|
||||
}
|
||||
if (hasNormal) {
|
||||
// Curved surface is probably always need to compute normal(not sampling from control points)
|
||||
if (doBezier) {
|
||||
// Bernstein derivative
|
||||
WRITE(p, " float2 bernderiv[4];\n");
|
||||
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
|
||||
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
|
||||
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
|
||||
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
|
||||
// Hardware tessellation
|
||||
WRITE(p, " Tess tess;\n");
|
||||
WRITE(p, " tessellate(In, tess);\n");
|
||||
|
||||
WRITE(p, " float2 bernderiv_u[4];\n");
|
||||
WRITE(p, " float2 bernderiv_v[4];\n");
|
||||
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
|
||||
WRITE(p, " bernderiv_u[i] = float2(bernderiv[i].x, weights[i].y);\n");
|
||||
WRITE(p, " bernderiv_v[i] = float2(weights[i].x, bernderiv[i].y);\n");
|
||||
WRITE(p, " }\n");
|
||||
|
||||
WRITE(p, " float3 du = tess_sample(_pos, bernderiv_u);\n");
|
||||
WRITE(p, " float3 dv = tess_sample(_pos, bernderiv_v);\n");
|
||||
} else if (doSpline) {
|
||||
WRITE(p, " float2 tess_next_u = float2(In.normal.x, 0.0);\n");
|
||||
WRITE(p, " float2 tess_next_v = float2(0.0, In.normal.y);\n");
|
||||
// Right
|
||||
WRITE(p, " float2 tess_pos_r = tess_pos + tess_next_u;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " float3 pos_r = tess_sample(_pos, weights);\n");
|
||||
// Left
|
||||
WRITE(p, " float2 tess_pos_l = tess_pos - tess_next_u;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " float3 pos_l = tess_sample(_pos, weights);\n");
|
||||
// Down
|
||||
WRITE(p, " float2 tess_pos_d = tess_pos + tess_next_v;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " float3 pos_d = tess_sample(_pos, weights);\n");
|
||||
// Up
|
||||
WRITE(p, " float2 tess_pos_u = tess_pos - tess_next_v;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " float3 pos_u = tess_sample(_pos, weights);\n");
|
||||
|
||||
WRITE(p, " float3 du = pos_r - pos_l;\n");
|
||||
WRITE(p, " float3 dv = pos_d - pos_u;\n");
|
||||
}
|
||||
WRITE(p, " float3 nrm = cross(du, dv);\n");
|
||||
WRITE(p, " nrm = normalize(nrm);\n");
|
||||
}
|
||||
WRITE(p, " float3 worldpos = mul(float4(pos.xyz, 1.0), u_world);\n");
|
||||
if (hasNormal)
|
||||
WRITE(p, " float3 worldnormal = normalize(mul(float4(%snrm, 0.0), u_world));\n", flipNormalTess ? "-" : "");
|
||||
WRITE(p, " float3 worldpos = mul(float4(tess.pos.xyz, 1.0), u_world);\n");
|
||||
if (hasNormalTess)
|
||||
WRITE(p, " float3 worldnormal = normalize(mul(float4(%stess.nrm, 0.0), u_world));\n", flipNormalTess ? "-" : "");
|
||||
else
|
||||
WRITE(p, " float3 worldnormal = float3(0.0, 0.0, 1.0);\n");
|
||||
} else {
|
||||
@ -600,9 +517,10 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
const char *diffuseStr = (matUpdate & 2) && hasColor ? "In.color0.rgb" : "u_matdiffuse";
|
||||
const char *specularStr = (matUpdate & 4) && hasColor ? "In.color0.rgb" : "u_matspecular.rgb";
|
||||
if (doBezier || doSpline) {
|
||||
ambientStr = (matUpdate & 1) && hasColor ? "col" : "u_matambientalpha";
|
||||
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "u_matdiffuse";
|
||||
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "u_matspecular.rgb";
|
||||
// TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background.
|
||||
ambientStr = (matUpdate & 1) && hasColor ? "tess.col" : "u_matambientalpha";
|
||||
diffuseStr = (matUpdate & 2) && hasColor ? "tess.col.rgb" : "u_matdiffuse";
|
||||
specularStr = (matUpdate & 4) && hasColor ? "tess.col.rgb" : "u_matspecular.rgb";
|
||||
}
|
||||
|
||||
bool diffuseIsZero = true;
|
||||
@ -729,7 +647,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
// Lighting doesn't affect color.
|
||||
if (hasColor) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " Out.v_color0 = col;\n");
|
||||
WRITE(p, " Out.v_color0 = tess.col;\n");
|
||||
else
|
||||
WRITE(p, " Out.v_color0 = In.color0;\n");
|
||||
} else {
|
||||
@ -747,7 +665,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
if (scaleUV) {
|
||||
if (hasTexcoord) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " Out.v_texcoord = float3(tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
WRITE(p, " Out.v_texcoord = float3(tess.tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
else
|
||||
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy, 0.0);\n");
|
||||
} else {
|
||||
@ -755,10 +673,7 @@ void GenerateVertexShaderHLSL(const VShaderID &id, char *buffer, ShaderLanguage
|
||||
}
|
||||
} else {
|
||||
if (hasTexcoord) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " Out.v_texcoord = float3(tex.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
else
|
||||
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
WRITE(p, " Out.v_texcoord = float3(In.texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
} else {
|
||||
WRITE(p, " Out.v_texcoord = float3(u_uvscaleoffset.zw, 0.0);\n");
|
||||
}
|
||||
|
@ -81,22 +81,21 @@ DrawEngineGLES::DrawEngineGLES(Draw::DrawContext *draw) : vai_(256), draw_(draw)
|
||||
// All this is a LOT of memory, need to see if we can cut down somehow.
|
||||
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
|
||||
indexGen.Setup(decIndex);
|
||||
|
||||
InitDeviceObjects();
|
||||
|
||||
tessDataTransfer = new TessellationDataTransferGLES(render_);
|
||||
tessDataTransferGLES = new TessellationDataTransferGLES(render_);
|
||||
tessDataTransfer = tessDataTransferGLES;
|
||||
}
|
||||
|
||||
DrawEngineGLES::~DrawEngineGLES() {
|
||||
DestroyDeviceObjects();
|
||||
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
|
||||
|
||||
delete tessDataTransfer;
|
||||
delete tessDataTransferGLES;
|
||||
}
|
||||
|
||||
void DrawEngineGLES::DeviceLost() {
|
||||
@ -166,7 +165,7 @@ void DrawEngineGLES::EndFrame() {
|
||||
FrameData &frameData = frameData_[render_->GetCurFrame()];
|
||||
render_->EndPushBuffer(frameData.pushIndex);
|
||||
render_->EndPushBuffer(frameData.pushVertex);
|
||||
tessDataTransfer->EndFrame();
|
||||
tessDataTransferGLES->EndFrame();
|
||||
}
|
||||
|
||||
struct GlTypeInfo {
|
||||
@ -520,10 +519,7 @@ rotateVBO:
|
||||
indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer);
|
||||
render_->BindIndexBuffer(indexBuffer);
|
||||
}
|
||||
if (gstate_c.bezier || gstate_c.spline)
|
||||
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset, numPatches);
|
||||
else
|
||||
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset);
|
||||
render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset);
|
||||
} else {
|
||||
render_->Draw(glprim[prim], 0, vertexCount);
|
||||
}
|
||||
@ -655,46 +651,66 @@ bool DrawEngineGLES::IsCodePtrVertexDecoder(const u8 *ptr) const {
|
||||
return decJitCache_->IsInSpace(ptr);
|
||||
}
|
||||
|
||||
void DrawEngineGLES::TessellationDataTransferGLES::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) {
|
||||
void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
|
||||
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
|
||||
bool hasTexCoord = (vertType & GE_VTYPE_TC_MASK) != 0;
|
||||
|
||||
int size = size_u * size_v;
|
||||
float *pos = new float[size * 4];
|
||||
float *tex = hasTexCoord ? new float[size * 4] : nullptr;
|
||||
float *col = hasColor ? new float[size * 4] : nullptr;
|
||||
int stride = 4;
|
||||
|
||||
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
|
||||
// Removed the 1D texture support, it's unlikely to be relevant for performance.
|
||||
if (data_tex[0])
|
||||
renderManager_->DeleteTexture(data_tex[0]);
|
||||
uint8_t *pos_data = new uint8_t[size * sizeof(float) * 4];
|
||||
memcpy(pos_data, pos, size * sizeof(float) * 4);
|
||||
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D);
|
||||
renderManager_->TextureImage(data_tex[0], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, pos_data, GLRAllocType::NEW, false);
|
||||
renderManager_->FinalizeTexture(data_tex[0], 0, false);
|
||||
renderManager_->BindTexture(TEX_SLOT_SPLINE_POS, data_tex[0]);
|
||||
|
||||
// Texcoords
|
||||
if (hasTexCoords) {
|
||||
if (data_tex[1])
|
||||
renderManager_->DeleteTexture(data_tex[1]);
|
||||
uint8_t *tex_data = new uint8_t[size * sizeof(float) * 4];
|
||||
memcpy(tex_data, tex, size * sizeof(float) * 4);
|
||||
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D);
|
||||
renderManager_->TextureImage(data_tex[1], 0, size, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, tex_data, GLRAllocType::NEW, false);
|
||||
renderManager_->FinalizeTexture(data_tex[1], 0, false);
|
||||
renderManager_->BindTexture(TEX_SLOT_SPLINE_NRM, data_tex[1]);
|
||||
// Control Points
|
||||
if (prevSizeU < size_u || prevSizeV < size_v) {
|
||||
prevSizeU = size_u;
|
||||
prevSizeV = size_v;
|
||||
if (!data_tex[0])
|
||||
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D);
|
||||
renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, GL_RGBA32F, GL_RGBA, GL_FLOAT, nullptr, GLRAllocType::NONE, false);
|
||||
renderManager_->FinalizeTexture(data_tex[0], 0, false);
|
||||
}
|
||||
renderManager_->BindTexture(TEX_SLOT_SPLINE_POINTS, data_tex[0]);
|
||||
// Position
|
||||
renderManager_->TextureSubImage(data_tex[0], 0, 0, 0, size_u, size_v, GL_RGBA, GL_FLOAT, (u8 *)pos, GLRAllocType::NEW);
|
||||
// Texcoord
|
||||
if (hasTexCoord)
|
||||
renderManager_->TextureSubImage(data_tex[0], 0, size_u, 0, size_u, size_v, GL_RGBA, GL_FLOAT, (u8 *)tex, GLRAllocType::NEW);
|
||||
// Color
|
||||
if (hasColor)
|
||||
renderManager_->TextureSubImage(data_tex[0], 0, size_u * 2, 0, size_u, size_v, GL_RGBA, GL_FLOAT, (u8 *)col, GLRAllocType::NEW);
|
||||
|
||||
if (data_tex[2])
|
||||
renderManager_->DeleteTexture(data_tex[2]);
|
||||
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D);
|
||||
int sizeColor = hasColor ? size : 1;
|
||||
uint8_t *col_data = new uint8_t[sizeColor * sizeof(float) * 4];
|
||||
memcpy(col_data, col, sizeColor * sizeof(float) * 4);
|
||||
// Weight U
|
||||
if (prevSizeWU < weights.size_u) {
|
||||
prevSizeWU = weights.size_u;
|
||||
if (!data_tex[1])
|
||||
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D);
|
||||
renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, nullptr, GLRAllocType::NONE, false);
|
||||
renderManager_->FinalizeTexture(data_tex[1], 0, false);
|
||||
}
|
||||
renderManager_->BindTexture(TEX_SLOT_SPLINE_WEIGHTS_U, data_tex[1]);
|
||||
renderManager_->TextureSubImage(data_tex[1], 0, 0, 0, weights.size_u * 2, 1, GL_RGBA, GL_FLOAT, (u8 *)weights.u, GLRAllocType::NONE);
|
||||
|
||||
renderManager_->TextureImage(data_tex[2], 0, sizeColor, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, col_data, GLRAllocType::NEW, false);
|
||||
renderManager_->FinalizeTexture(data_tex[2], 0, false);
|
||||
renderManager_->BindTexture(TEX_SLOT_SPLINE_COL, data_tex[2]);
|
||||
// Weight V
|
||||
if (prevSizeWV < weights.size_v) {
|
||||
prevSizeWV = weights.size_v;
|
||||
if (!data_tex[2])
|
||||
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D);
|
||||
renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, GL_RGBA32F, GL_RGBA, GL_FLOAT, nullptr, GLRAllocType::NONE, false);
|
||||
renderManager_->FinalizeTexture(data_tex[2], 0, false);
|
||||
}
|
||||
renderManager_->BindTexture(TEX_SLOT_SPLINE_WEIGHTS_V, data_tex[2]);
|
||||
renderManager_->TextureSubImage(data_tex[2], 0, 0, 0, weights.size_v * 2, 1, GL_RGBA, GL_FLOAT, (u8 *)weights.v, GLRAllocType::NONE);
|
||||
}
|
||||
|
||||
void DrawEngineGLES::TessellationDataTransferGLES::EndFrame() {
|
||||
void TessellationDataTransferGLES::EndFrame() {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (data_tex[i]) {
|
||||
renderManager_->DeleteTexture(data_tex[i]);
|
||||
data_tex[i] = nullptr;
|
||||
}
|
||||
}
|
||||
prevSizeU = prevSizeV = prevSizeWU = prevSizeWV = 0;
|
||||
}
|
||||
|
@ -46,9 +46,9 @@ enum {
|
||||
TEX_SLOT_SHADERBLEND_SRC = 1,
|
||||
TEX_SLOT_ALPHATEST = 2,
|
||||
TEX_SLOT_CLUT = 3,
|
||||
TEX_SLOT_SPLINE_POS = 4,
|
||||
TEX_SLOT_SPLINE_NRM = 5,
|
||||
TEX_SLOT_SPLINE_COL = 6,
|
||||
TEX_SLOT_SPLINE_POINTS = 4,
|
||||
TEX_SLOT_SPLINE_WEIGHTS_U = 5,
|
||||
TEX_SLOT_SPLINE_WEIGHTS_V = 6,
|
||||
};
|
||||
|
||||
|
||||
@ -110,6 +110,23 @@ public:
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
class TessellationDataTransferGLES : public TessellationDataTransfer {
|
||||
private:
|
||||
GLRTexture *data_tex[3]{};
|
||||
int prevSizeU = 0, prevSizeV = 0;
|
||||
int prevSizeWU = 0, prevSizeWV = 0;
|
||||
GLRenderManager *renderManager_;
|
||||
public:
|
||||
TessellationDataTransferGLES(GLRenderManager *renderManager)
|
||||
: renderManager_(renderManager) { }
|
||||
~TessellationDataTransferGLES() {
|
||||
EndFrame();
|
||||
}
|
||||
// Send spline/bezier's control points and weights to vertex shader through floating point texture.
|
||||
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
|
||||
void EndFrame(); // Queues textures for deletion.
|
||||
};
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class DrawEngineGLES : public DrawEngineCommon {
|
||||
public:
|
||||
@ -208,17 +225,5 @@ private:
|
||||
int bufferDecimationCounter_ = 0;
|
||||
|
||||
// Hardware tessellation
|
||||
class TessellationDataTransferGLES : public TessellationDataTransfer {
|
||||
private:
|
||||
GLRTexture *data_tex[3]{};
|
||||
GLRenderManager *renderManager_;
|
||||
public:
|
||||
TessellationDataTransferGLES(GLRenderManager *renderManager)
|
||||
: renderManager_(renderManager) { }
|
||||
~TessellationDataTransferGLES() {
|
||||
EndFrame();
|
||||
}
|
||||
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
|
||||
void EndFrame() override; // Queues textures for deletion.
|
||||
};
|
||||
TessellationDataTransferGLES *tessDataTransferGLES;
|
||||
};
|
||||
|
@ -109,8 +109,7 @@ GPU_GLES::GPU_GLES(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
|
||||
if (g_Config.bHardwareTessellation) {
|
||||
// Disable hardware tessellation if device is unsupported.
|
||||
bool hasTexelFetch = gl_extensions.GLES3 || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 3, 0)) || gl_extensions.EXT_gpu_shader4;
|
||||
if (!gstate_c.SupportsAll(GPU_SUPPORTS_INSTANCE_RENDERING | GPU_SUPPORTS_VERTEX_TEXTURE_FETCH | GPU_SUPPORTS_TEXTURE_FLOAT) || !hasTexelFetch) {
|
||||
// TODO: Check unsupported device name list.(Above gpu features are supported but it has issues with weak gpu, memory, shader compiler etc...)
|
||||
if (!gstate_c.SupportsAll(GPU_SUPPORTS_VERTEX_TEXTURE_FETCH | GPU_SUPPORTS_TEXTURE_FLOAT) || !hasTexelFetch) {
|
||||
g_Config.bHardwareTessellation = false;
|
||||
ERROR_LOG(G3D, "Hardware Tessellation is unsupported, falling back to software tessellation");
|
||||
I18NCategory *gr = GetI18NCategory("Graphics");
|
||||
|
@ -159,13 +159,10 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
|
||||
|
||||
// We need to fetch these unconditionally, gstate_c.spline or bezier will not be set if we
|
||||
// create this shader at load time from the shader cache.
|
||||
queries.push_back({ &u_tess_pos_tex, "u_tess_pos_tex" });
|
||||
queries.push_back({ &u_tess_tex_tex, "u_tess_tex_tex" });
|
||||
queries.push_back({ &u_tess_col_tex, "u_tess_col_tex" });
|
||||
queries.push_back({ &u_spline_count_u, "u_spline_count_u" });
|
||||
queries.push_back({ &u_spline_count_v, "u_spline_count_v" });
|
||||
queries.push_back({ &u_spline_type_u, "u_spline_type_u" });
|
||||
queries.push_back({ &u_spline_type_v, "u_spline_type_v" });
|
||||
queries.push_back({ &u_tess_points, "u_tess_points" });
|
||||
queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
|
||||
queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
|
||||
queries.push_back({ &u_spline_counts, "u_spline_counts" });
|
||||
queries.push_back({ &u_depal, "u_depal" });
|
||||
|
||||
attrMask = vs->GetAttrMask();
|
||||
@ -176,9 +173,9 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
|
||||
initialize.push_back({ &u_fbotex, 0, 1 });
|
||||
initialize.push_back({ &u_testtex, 0, 2 });
|
||||
initialize.push_back({ &u_pal, 0, 3 }); // CLUT
|
||||
initialize.push_back({ &u_tess_pos_tex, 0, 4 }); // Texture unit 4
|
||||
initialize.push_back({ &u_tess_tex_tex, 0, 5 }); // Texture unit 5
|
||||
initialize.push_back({ &u_tess_col_tex, 0, 6 }); // Texture unit 6
|
||||
initialize.push_back({ &u_tess_points, 0, 4 }); // Control Points
|
||||
initialize.push_back({ &u_tess_weights_u, 0, 5 });
|
||||
initialize.push_back({ &u_tess_weights_v, 0, 6 });
|
||||
|
||||
program = render->CreateProgram(shaders, semantics, queries, initialize, gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND);
|
||||
|
||||
@ -567,13 +564,9 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
|
||||
}
|
||||
|
||||
if (dirty & DIRTY_BEZIERSPLINE) {
|
||||
render_->SetUniformI1(&u_spline_count_u, gstate_c.spline_count_u);
|
||||
if (u_spline_count_v != -1)
|
||||
render_->SetUniformI1(&u_spline_count_v, gstate_c.spline_count_v);
|
||||
if (u_spline_type_u != -1)
|
||||
render_->SetUniformI1(&u_spline_type_u, gstate_c.spline_type_u);
|
||||
if (u_spline_type_v != -1)
|
||||
render_->SetUniformI1(&u_spline_type_v, gstate_c.spline_type_v);
|
||||
if (u_spline_counts != -1) {
|
||||
render_->SetUniformI1(&u_spline_counts, gstate_c.spline_num_points_u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,13 +117,11 @@ public:
|
||||
int u_lightspecular[4]; // attenuation
|
||||
int u_lightambient[4]; // attenuation
|
||||
|
||||
int u_tess_pos_tex;
|
||||
int u_tess_tex_tex;
|
||||
int u_tess_col_tex;
|
||||
int u_spline_count_u;
|
||||
int u_spline_count_v;
|
||||
int u_spline_type_u;
|
||||
int u_spline_type_v;
|
||||
// Spline Tessellation
|
||||
int u_tess_points; // Control Points
|
||||
int u_tess_weights_u;
|
||||
int u_tess_weights_v;
|
||||
int u_spline_counts;
|
||||
};
|
||||
|
||||
// Real public interface
|
||||
|
@ -193,6 +193,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
bool doSpline = id.Bit(VS_BIT_SPLINE);
|
||||
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
|
||||
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
|
||||
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
|
||||
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
|
||||
|
||||
const char *shading = "";
|
||||
@ -379,83 +380,88 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
if (doBezier || doSpline) {
|
||||
*uniformMask |= DIRTY_BEZIERSPLINE;
|
||||
|
||||
WRITE(p, "uniform sampler2D u_tess_pos_tex;\n");
|
||||
WRITE(p, "uniform sampler2D u_tess_tex_tex;\n");
|
||||
WRITE(p, "uniform sampler2D u_tess_col_tex;\n");
|
||||
WRITE(p, "uniform sampler2D u_tess_points;\n"); // Control Points
|
||||
WRITE(p, "uniform sampler2D u_tess_weights_u;\n");
|
||||
WRITE(p, "uniform sampler2D u_tess_weights_v;\n");
|
||||
|
||||
WRITE(p, "uniform int u_spline_count_u;\n");
|
||||
WRITE(p, "uniform int u_spline_counts;\n");
|
||||
|
||||
for (int i = 2; i <= 4; i++) {
|
||||
// Define 3 types vec2, vec3, vec4
|
||||
WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
|
||||
WRITE(p, "vec%d tess_sample(in vec%d points[16], mat4 weights) {\n", i, i);
|
||||
WRITE(p, " vec%d pos = vec%d(0.0);\n", i, i);
|
||||
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
|
||||
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
|
||||
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
|
||||
WRITE(p, " if (f != 0.0)\n");
|
||||
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
for (int v = 0; v < 4; ++v) {
|
||||
for (int u = 0; u < 4; ++u) {
|
||||
WRITE(p, " pos += weights[%i][%i] * points[%i];\n", v, u, v * 4 + u);
|
||||
}
|
||||
}
|
||||
WRITE(p, " return pos;\n");
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
if (doSpline) {
|
||||
WRITE(p, "uniform int u_spline_count_v;\n");
|
||||
WRITE(p, "uniform int u_spline_type_u;\n");
|
||||
WRITE(p, "uniform int u_spline_type_v;\n");
|
||||
|
||||
WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
|
||||
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
|
||||
WRITE(p, " knot[i] = vec2(float(i + patch_pos.x - 2), float(i + patch_pos.y - 2));\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.x & 1) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.x <= 2)\n");
|
||||
WRITE(p, " knot[0].x = 0.0;\n");
|
||||
WRITE(p, " if (patch_pos.x <= 1)\n");
|
||||
WRITE(p, " knot[1].x = 0.0;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.x & 2) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
|
||||
WRITE(p, " knot[5].x = float(num_patches.x);\n");
|
||||
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
|
||||
WRITE(p, " knot[4].x = float(num_patches.x);\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.y & 1) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.y <= 2)\n");
|
||||
WRITE(p, " knot[0].y = 0.0;\n");
|
||||
WRITE(p, " if (patch_pos.y <= 1)\n");
|
||||
WRITE(p, " knot[1].y = 0.0;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.y & 2) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
|
||||
WRITE(p, " knot[5].y = float(num_patches.y);\n");
|
||||
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
|
||||
WRITE(p, " knot[4].y = float(num_patches.y);\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, "}\n");
|
||||
|
||||
WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
|
||||
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
|
||||
WRITE(p, " vec2 t0 = (t - knot[0]);\n");
|
||||
WRITE(p, " vec2 t1 = (t - knot[1]);\n");
|
||||
WRITE(p, " vec2 t2 = (t - knot[2]);\n");
|
||||
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
|
||||
WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n");
|
||||
WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n");
|
||||
WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n");
|
||||
WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n");
|
||||
WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n");
|
||||
WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n");
|
||||
WRITE(p, " vec2 a = (1.0 - f30)*(1.0 - f31);\n");
|
||||
WRITE(p, " vec2 b = (f31*f41);\n");
|
||||
WRITE(p, " vec2 c = (1.0 - f41)*(1.0 - f42);\n");
|
||||
WRITE(p, " vec2 d = (f42*f52);\n");
|
||||
WRITE(p, " weights[0] = a - (a*f32);\n");
|
||||
WRITE(p, " weights[1] = vec2(1.0) - a - b + ((a + b + c - vec2(1.0))*f32);\n");
|
||||
WRITE(p, " weights[2] = b + ((vec2(1.0) - b - c - d)*f32);\n");
|
||||
WRITE(p, " weights[3] = d*f32;\n");
|
||||
if (!gl_extensions.VersionGEThan(3, 0, 0)) { // For glsl version 1.10
|
||||
WRITE(p, "mat4 outerProduct(vec4 u, vec4 v) {\n");
|
||||
WRITE(p, " return mat4(u * v[0], u * v[1], u * v[2], u * v[3]);\n");
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
WRITE(p, "struct Tess {\n");
|
||||
WRITE(p, " vec3 pos;\n");
|
||||
if (doTexture)
|
||||
WRITE(p, " vec2 tex;\n");
|
||||
WRITE(p, " vec4 col;\n");
|
||||
if (hasNormalTess)
|
||||
WRITE(p, " vec3 nrm;\n");
|
||||
WRITE(p, "};\n");
|
||||
|
||||
WRITE(p, "void tessellate(out Tess tess) {\n");
|
||||
WRITE(p, " ivec2 point_pos = ivec2(position.z, normal.z)%s;\n", doBezier ? " * 3" : "");
|
||||
WRITE(p, " ivec2 weight_idx = ivec2(position.xy);\n");
|
||||
|
||||
// Load 4x4 control points
|
||||
WRITE(p, " vec3 _pos[16];\n");
|
||||
WRITE(p, " vec2 _tex[16];\n");
|
||||
WRITE(p, " vec4 _col[16];\n");
|
||||
WRITE(p, " int index_u, index_v;\n");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
WRITE(p, " index_u = (%i + point_pos.x);\n", j);
|
||||
WRITE(p, " index_v = (%i + point_pos.y);\n", i);
|
||||
WRITE(p, " _pos[%i] = %s(u_tess_points, ivec2(index_u, index_v), 0).xyz;\n", i * 4 + j, texelFetch);
|
||||
if (doTexture && hasTexcoordTess)
|
||||
WRITE(p, " _tex[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts, index_v), 0).xy;\n", i * 4 + j, texelFetch);
|
||||
if (hasColorTess)
|
||||
WRITE(p, " _col[%i] = %s(u_tess_points, ivec2(index_u + u_spline_counts * 2, index_v), 0).rgba;\n", i * 4 + j, texelFetch);
|
||||
}
|
||||
}
|
||||
|
||||
// Basis polynomials as weight coefficients
|
||||
WRITE(p, " vec4 basis_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(weight_idx.x * 2, 0)");
|
||||
WRITE(p, " vec4 basis_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(weight_idx.y * 2, 0)");
|
||||
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
|
||||
|
||||
// Tessellate
|
||||
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
|
||||
if (doTexture) {
|
||||
if (hasTexcoordTess)
|
||||
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
|
||||
else
|
||||
WRITE(p, " tess.tex = normal.xy;\n");
|
||||
}
|
||||
if (hasColorTess)
|
||||
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
|
||||
else
|
||||
WRITE(p, " tess.col = u_matambientalpha;\n");
|
||||
if (hasNormalTess) {
|
||||
// Derivatives as weight coefficients
|
||||
WRITE(p, " vec4 deriv_u = %s(u_tess_weights_u, %s, 0);\n", texelFetch, "ivec2(weight_idx.x * 2 + 1, 0)");
|
||||
WRITE(p, " vec4 deriv_v = %s(u_tess_weights_v, %s, 0);\n", texelFetch, "ivec2(weight_idx.y * 2 + 1, 0)");
|
||||
|
||||
WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
|
||||
WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
|
||||
WRITE(p, " tess.nrm = normalize(cross(du, dv));\n");
|
||||
}
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
WRITE(p, "void main() {\n");
|
||||
@ -494,101 +500,14 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
} else {
|
||||
// Step 1: World Transform / Skinning
|
||||
if (!enableBones) {
|
||||
// Hardware tessellation
|
||||
if (doBezier || doSpline) {
|
||||
WRITE(p, " vec3 _pos[16];\n");
|
||||
WRITE(p, " vec2 _tex[16];\n");
|
||||
WRITE(p, " vec4 _col[16];\n");
|
||||
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(u_spline_count_u - 1) / 3" : "u_spline_count_u - 3");
|
||||
WRITE(p, " int u = int(mod(float(gl_InstanceID), float(num_patches_u)));\n");
|
||||
WRITE(p, " int v = gl_InstanceID / num_patches_u;\n");
|
||||
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
|
||||
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
|
||||
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
|
||||
WRITE(p, " int index = (i + v%s) * u_spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
|
||||
WRITE(p, " _pos[i * 4 + j] = %s(u_tess_pos_tex, ivec2(index, 0), 0).xyz;\n", texelFetch);
|
||||
if (doTexture && hasTexcoord && hasTexcoordTess)
|
||||
WRITE(p, " _tex[i * 4 + j] = %s(u_tess_tex_tex, ivec2(index, 0), 0).xy;\n", texelFetch);
|
||||
if (hasColor && hasColorTess)
|
||||
WRITE(p, " _col[i * 4 + j] = %s(u_tess_col_tex, ivec2(index, 0), 0).rgba;\n", texelFetch);
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " vec2 tess_pos = position.xy;\n");
|
||||
WRITE(p, " vec2 weights[4];\n");
|
||||
if (doBezier) {
|
||||
// Bernstein 3D
|
||||
WRITE(p, " weights[0] = (1.0 - tess_pos) * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
|
||||
WRITE(p, " weights[1] = 3.0 * tess_pos * (1.0 - tess_pos) * (1.0 - tess_pos);\n");
|
||||
WRITE(p, " weights[2] = 3.0 * tess_pos * tess_pos * (1.0 - tess_pos);\n");
|
||||
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
|
||||
} else { // Spline
|
||||
WRITE(p, " ivec2 spline_num_patches = ivec2(u_spline_count_u - 3, u_spline_count_v - 3);\n");
|
||||
WRITE(p, " ivec2 spline_type = ivec2(u_spline_type_u, u_spline_type_v);\n");
|
||||
WRITE(p, " vec2 knots[6];\n");
|
||||
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
|
||||
WRITE(p, " spline_weight(tess_pos + vec2(patch_pos), knots, weights);\n");
|
||||
}
|
||||
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
|
||||
if (doTexture && hasTexcoord) {
|
||||
if (hasTexcoordTess)
|
||||
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
|
||||
else
|
||||
WRITE(p, " vec2 tex = tess_pos + vec2(patch_pos);\n");
|
||||
}
|
||||
if (hasColor) {
|
||||
if (hasColorTess)
|
||||
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
|
||||
else
|
||||
WRITE(p, " vec4 col = %s(u_tess_col_tex, ivec2(0, 0), 0).rgba;\n", texelFetch);
|
||||
}
|
||||
if (hasNormal) {
|
||||
// Curved surface is probably always need to compute normal(not sampling from control points)
|
||||
if (doBezier) {
|
||||
// Bernstein derivative
|
||||
WRITE(p, " vec2 bernderiv[4];\n");
|
||||
WRITE(p, " bernderiv[0] = -3.0 * (tess_pos - 1.0) * (tess_pos - 1.0); \n");
|
||||
WRITE(p, " bernderiv[1] = 9.0 * tess_pos * tess_pos - 12.0 * tess_pos + 3.0; \n");
|
||||
WRITE(p, " bernderiv[2] = 3.0 * (2.0 - 3.0 * tess_pos) * tess_pos; \n");
|
||||
WRITE(p, " bernderiv[3] = 3.0 * tess_pos * tess_pos; \n");
|
||||
// Hardware tessellation
|
||||
WRITE(p, " Tess tess;\n");
|
||||
WRITE(p, " tessellate(tess);\n");
|
||||
|
||||
WRITE(p, " vec2 bernderiv_u[4];\n");
|
||||
WRITE(p, " vec2 bernderiv_v[4];\n");
|
||||
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
|
||||
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
|
||||
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
|
||||
WRITE(p, " }\n");
|
||||
|
||||
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
|
||||
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
|
||||
} else { // Spline
|
||||
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0.0);\n");
|
||||
WRITE(p, " vec2 tess_next_v = vec2(0.0, normal.y);\n");
|
||||
// Right
|
||||
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_r + vec2(patch_pos), knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
|
||||
// Left
|
||||
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_l + vec2(patch_pos), knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
|
||||
// Down
|
||||
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_d + vec2(patch_pos), knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
|
||||
// Up
|
||||
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_u + vec2(patch_pos), knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
|
||||
|
||||
WRITE(p, " vec3 du = pos_r - pos_l;\n");
|
||||
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
|
||||
}
|
||||
WRITE(p, " vec3 nrm = cross(du, dv);\n");
|
||||
WRITE(p, " nrm = normalize(nrm);\n");
|
||||
}
|
||||
WRITE(p, " vec3 worldpos = (u_world * vec4(pos.xyz, 1.0)).xyz;\n");
|
||||
if (hasNormal) {
|
||||
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%snrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
|
||||
WRITE(p, " vec3 worldpos = (u_world * vec4(tess.pos.xyz, 1.0)).xyz;\n");
|
||||
if (hasNormalTess) {
|
||||
WRITE(p, " mediump vec3 worldnormal = normalize((u_world * vec4(%stess.nrm, 0.0)).xyz);\n", flipNormalTess ? "-" : "");
|
||||
} else {
|
||||
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
|
||||
}
|
||||
@ -692,9 +611,10 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
const char *diffuseStr = (matUpdate & 2) && hasColor ? "color0.rgb" : "u_matdiffuse";
|
||||
const char *specularStr = (matUpdate & 4) && hasColor ? "color0.rgb" : "u_matspecular.rgb";
|
||||
if (doBezier || doSpline) {
|
||||
ambientStr = (matUpdate & 1) && hasColor ? "col" : "u_matambientalpha";
|
||||
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "u_matdiffuse";
|
||||
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "u_matspecular.rgb";
|
||||
// TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background.
|
||||
ambientStr = (matUpdate & 1) && hasColor ? "tess.col" : "u_matambientalpha";
|
||||
diffuseStr = (matUpdate & 2) && hasColor ? "tess.col.rgb" : "u_matdiffuse";
|
||||
specularStr = (matUpdate & 4) && hasColor ? "tess.col.rgb" : "u_matspecular.rgb";
|
||||
}
|
||||
|
||||
bool diffuseIsZero = true;
|
||||
@ -821,7 +741,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
// Lighting doesn't affect color.
|
||||
if (hasColor) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " v_color0 = col;\n");
|
||||
WRITE(p, " v_color0 = tess.col;\n");
|
||||
else
|
||||
WRITE(p, " v_color0 = color0;\n");
|
||||
} else {
|
||||
@ -839,9 +759,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
if (scaleUV) {
|
||||
if (hasTexcoord) {
|
||||
if (doBezier || doSpline)
|
||||
// TODO: Need fix?
|
||||
// Fix to avoid temporarily texture animation bug with hardware tessellation.
|
||||
WRITE(p, " v_texcoord = vec3(tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
WRITE(p, " v_texcoord = vec3(tess.tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
else
|
||||
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy, 0.0);\n");
|
||||
} else {
|
||||
@ -849,10 +767,7 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
|
||||
}
|
||||
} else {
|
||||
if (hasTexcoord) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " v_texcoord = vec3(tex * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
else
|
||||
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
WRITE(p, " v_texcoord = vec3(texcoord.xy * u_uvscaleoffset.xy + u_uvscaleoffset.zw, 0.0);\n");
|
||||
} else {
|
||||
WRITE(p, " v_texcoord = vec3(u_uvscaleoffset.zw, 0.0);\n");
|
||||
}
|
||||
|
@ -1718,8 +1718,6 @@ bail:
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
|
||||
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
|
||||
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
|
||||
|
||||
@ -1760,9 +1758,9 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
|
||||
if (CanUseHardwareTessellation(patchPrim)) {
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
|
||||
gstate_c.bezier = true;
|
||||
if (gstate_c.spline_count_u != bz_ucount) {
|
||||
if (gstate_c.spline_num_points_u != bz_ucount) {
|
||||
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
|
||||
gstate_c.spline_count_u = bz_ucount;
|
||||
gstate_c.spline_num_points_u = bz_ucount;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1780,8 +1778,6 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_Spline(u32 op, u32 diff) {
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
|
||||
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
|
||||
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
|
||||
|
||||
@ -1824,14 +1820,9 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
|
||||
if (CanUseHardwareTessellation(patchPrim)) {
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
|
||||
gstate_c.spline = true;
|
||||
bool countsChanged = gstate_c.spline_count_u != sp_ucount || gstate_c.spline_count_v != sp_vcount;
|
||||
bool typesChanged = gstate_c.spline_type_u != sp_utype || gstate_c.spline_type_v != sp_vtype;
|
||||
if (countsChanged || typesChanged) {
|
||||
if (gstate_c.spline_num_points_u != sp_ucount) {
|
||||
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
|
||||
gstate_c.spline_count_u = sp_ucount;
|
||||
gstate_c.spline_count_v = sp_vcount;
|
||||
gstate_c.spline_type_u = sp_utype;
|
||||
gstate_c.spline_type_v = sp_vtype;
|
||||
gstate_c.spline_num_points_u = sp_ucount;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -603,10 +603,7 @@ struct GPUStateCache {
|
||||
|
||||
bool bezier;
|
||||
bool spline;
|
||||
int spline_count_u;
|
||||
int spline_count_v;
|
||||
int spline_type_u;
|
||||
int spline_type_v;
|
||||
int spline_num_points_u;
|
||||
|
||||
bool useShaderDepal;
|
||||
GEBufferFormat depalFramebufferFormat;
|
||||
|
@ -102,11 +102,49 @@ float Vec3<float>::Distance2To(Vec3<float> &other)
|
||||
return Vec3<float>(other-(*this)).Length2();
|
||||
}
|
||||
|
||||
#if defined(_M_SSE)
|
||||
__m128 SSENormalizeMultiplierSSE2(__m128 v)
|
||||
{
|
||||
const __m128 sq = _mm_mul_ps(v, v);
|
||||
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
|
||||
const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2));
|
||||
const __m128 res = _mm_add_ss(r3, _mm_add_ss(r2, sq));
|
||||
|
||||
const __m128 rt = _mm_rsqrt_ss(res);
|
||||
return _mm_shuffle_ps(rt, rt, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
__m128 SSENormalizeMultiplierSSE4(__m128 v)
|
||||
{
|
||||
return _mm_rsqrt_ps(_mm_dp_ps(v, v, 0xFF));
|
||||
}
|
||||
|
||||
__m128 SSENormalizeMultiplier(bool useSSE4, __m128 v)
|
||||
{
|
||||
if (useSSE4)
|
||||
return SSENormalizeMultiplierSSE4(v);
|
||||
return SSENormalizeMultiplierSSE2(v);
|
||||
}
|
||||
#else
|
||||
__m128 SSENormalizeMultiplier(bool useSSE4, __m128 v)
|
||||
{
|
||||
return SSENormalizeMultiplierSSE2(v);
|
||||
}
|
||||
#endif
|
||||
template<>
|
||||
Vec3<float> Vec3<float>::Normalized() const
|
||||
Vec3<float> Vec3<float>::Normalized(bool useSSE4) const
|
||||
{
|
||||
const __m128 normalize = SSENormalizeMultiplier(useSSE4, vec);
|
||||
return _mm_mul_ps(normalize, vec);
|
||||
}
|
||||
#else
|
||||
template<>
|
||||
Vec3<float> Vec3<float>::Normalized(bool useSSE4) const
|
||||
{
|
||||
return (*this) / Length();
|
||||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
float Vec3<float>::Normalize()
|
||||
|
71
GPU/Math3D.h
71
GPU/Math3D.h
@ -25,6 +25,9 @@
|
||||
|
||||
#if defined(_M_SSE)
|
||||
#include <emmintrin.h>
|
||||
#if _M_SSE >= 0x401
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Math3D {
|
||||
@ -177,8 +180,6 @@ public:
|
||||
const Vec2 ts() const { return Vec2(y, x); }
|
||||
};
|
||||
|
||||
typedef Vec2<float> Vec2f;
|
||||
|
||||
template<typename T>
|
||||
class Vec3Packed;
|
||||
|
||||
@ -295,7 +296,7 @@ public:
|
||||
void SetLength(const float l);
|
||||
Vec3 WithLength(const float l) const;
|
||||
float Distance2To(Vec3 &other);
|
||||
Vec3 Normalized() const;
|
||||
Vec3 Normalized(bool useSSE4 = false) const;
|
||||
float Normalize(); // returns the previous length, which is often useful
|
||||
|
||||
T& operator [] (int i) //allow vector[2] = 3 (vector.z=3)
|
||||
@ -817,6 +818,7 @@ private:
|
||||
|
||||
}; // namespace Math3D
|
||||
|
||||
typedef Math3D::Vec2<float> Vec2f;
|
||||
typedef Math3D::Vec3<float> Vec3f;
|
||||
typedef Math3D::Vec3Packed<float> Vec3Packedf;
|
||||
typedef Math3D::Vec4<float> Vec4f;
|
||||
@ -1082,6 +1084,69 @@ __forceinline void Vec4<T>::ToRGBA(u8 *rgba) const
|
||||
*(u32 *)rgba = ToRGBA();
|
||||
}
|
||||
|
||||
#if defined(_M_SSE)
|
||||
// Specialized for SIMD optimization
|
||||
|
||||
// Vec3<float> operation
|
||||
template<>
|
||||
inline void Vec3<float>::operator += (const Vec3<float> &other)
|
||||
{
|
||||
vec = _mm_add_ps(vec, other.vec);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec3<float> Vec3<float>::operator + (const Vec3 &other) const
|
||||
{
|
||||
return Vec3<float>(_mm_add_ps(vec, other.vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec3<float> Vec3<float>::operator * (const Vec3 &other) const
|
||||
{
|
||||
return Vec3<float>(_mm_mul_ps(vec, other.vec));
|
||||
}
|
||||
|
||||
template<> template<>
|
||||
inline Vec3<float> Vec3<float>::operator * (const float &other) const
|
||||
{
|
||||
return Vec3<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
|
||||
}
|
||||
|
||||
// Vec4<float> operation
|
||||
template<>
|
||||
inline void Vec4<float>::operator += (const Vec4<float> &other)
|
||||
{
|
||||
vec = _mm_add_ps(vec, other.vec);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec4<float> Vec4<float>::operator + (const Vec4 &other) const
|
||||
{
|
||||
return Vec4<float>(_mm_add_ps(vec, other.vec));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline Vec4<float> Vec4<float>::operator * (const Vec4 &other) const
|
||||
{
|
||||
return Vec4<float>(_mm_mul_ps(vec, other.vec));
|
||||
}
|
||||
|
||||
template<> template<>
|
||||
inline Vec4<float> Vec4<float>::operator * (const float &other) const
|
||||
{
|
||||
return Vec4<float>(_mm_mul_ps(vec, _mm_set_ps1(other)));
|
||||
}
|
||||
|
||||
// Vec3<float> cross product
|
||||
template<>
|
||||
inline Vec3<float> Cross(const Vec3<float> &a, const Vec3<float> &b)
|
||||
{
|
||||
const __m128 left = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 0, 2, 1)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 1, 0, 2)));
|
||||
const __m128 right = _mm_mul_ps(_mm_shuffle_ps(a.vec, a.vec, _MM_SHUFFLE(3, 1, 0, 2)), _mm_shuffle_ps(b.vec, b.vec, _MM_SHUFFLE(3, 0, 2, 1)));
|
||||
return _mm_sub_ps(left, right);
|
||||
}
|
||||
#endif
|
||||
|
||||
}; // namespace Math3D
|
||||
|
||||
// linear interpolation via float: 0.0=begin, 1.0=end
|
||||
|
@ -42,13 +42,11 @@ SoftwareDrawEngine::SoftwareDrawEngine() {
|
||||
// All this is a LOT of memory, need to see if we can cut down somehow. Used for splines.
|
||||
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
}
|
||||
|
||||
SoftwareDrawEngine::~SoftwareDrawEngine() {
|
||||
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
void SoftwareDrawEngine::DispatchFlush() {
|
||||
@ -280,7 +278,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
||||
|
||||
u16 index_lower_bound = 0;
|
||||
u16 index_upper_bound = vertex_count - 1;
|
||||
IndexConverter idxConv(vertex_type, indices);
|
||||
IndexConverter ConvertIndex(vertex_type, indices);
|
||||
|
||||
if (indices)
|
||||
GetIndexBounds(indices, vertex_count, vertex_type, &index_lower_bound, &index_upper_bound);
|
||||
@ -321,7 +319,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
||||
{
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
@ -380,7 +378,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
||||
int skip_count = data_index == 0 ? 1 : 0;
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
@ -410,7 +408,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
||||
|
||||
for (int vtx = 0; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
@ -452,7 +450,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
||||
// Only read the central vertex if we're not continuing.
|
||||
if (data_index == 0) {
|
||||
if (indices) {
|
||||
vreader.Goto(idxConv.convert(0) - index_lower_bound);
|
||||
vreader.Goto(ConvertIndex(0) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(0);
|
||||
}
|
||||
@ -463,7 +461,7 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, GEPrimitiveTy
|
||||
|
||||
for (int vtx = start_vtx; vtx < vertex_count; ++vtx) {
|
||||
if (indices) {
|
||||
vreader.Goto(idxConv.convert(vtx) - index_lower_bound);
|
||||
vreader.Goto(ConvertIndex(vtx) - index_lower_bound);
|
||||
} else {
|
||||
vreader.Goto(vtx);
|
||||
}
|
||||
|
@ -69,6 +69,8 @@ enum {
|
||||
DRAW_BINDING_DYNUBO_LIGHT = 4,
|
||||
DRAW_BINDING_DYNUBO_BONE = 5,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF = 6,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -87,7 +89,6 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
|
||||
// All this is a LOT of memory, need to see if we can cut down somehow.
|
||||
decoded = (u8 *)AllocateMemoryPages(DECODED_VERTEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
splineBuffer = (u8 *)AllocateMemoryPages(SPLINE_BUFFER_SIZE, MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
|
||||
indexGen.Setup(decIndex);
|
||||
|
||||
@ -96,7 +97,7 @@ DrawEngineVulkan::DrawEngineVulkan(VulkanContext *vulkan, Draw::DrawContext *dra
|
||||
|
||||
void DrawEngineVulkan::InitDeviceObjects() {
|
||||
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
|
||||
VkDescriptorSetLayoutBinding bindings[7]{};
|
||||
VkDescriptorSetLayoutBinding bindings[9]{};
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
@ -126,6 +127,14 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
||||
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF;
|
||||
bindings[7].descriptorCount = 1;
|
||||
bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
|
||||
bindings[8].descriptorCount = 1;
|
||||
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
|
||||
|
||||
VkDevice device = vulkan_->GetDevice();
|
||||
|
||||
@ -167,13 +176,13 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
||||
|
||||
vertexCache_ = new VulkanPushBuffer(vulkan_, VERTEX_CACHE_SIZE);
|
||||
|
||||
tessDataTransfer = new TessellationDataTransferVulkan(vulkan_);
|
||||
tessDataTransferVulkan = new TessellationDataTransferVulkan(vulkan_);
|
||||
tessDataTransfer = tessDataTransferVulkan;
|
||||
}
|
||||
|
||||
DrawEngineVulkan::~DrawEngineVulkan() {
|
||||
FreeMemoryPages(decoded, DECODED_VERTEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(decIndex, DECODED_INDEX_BUFFER_SIZE);
|
||||
FreeMemoryPages(splineBuffer, SPLINE_BUFFER_SIZE);
|
||||
|
||||
DestroyDeviceObjects();
|
||||
}
|
||||
@ -201,8 +210,8 @@ void DrawEngineVulkan::FrameData::Destroy(VulkanContext *vulkan) {
|
||||
}
|
||||
|
||||
void DrawEngineVulkan::DestroyDeviceObjects() {
|
||||
delete tessDataTransfer;
|
||||
tessDataTransfer = nullptr;
|
||||
delete tessDataTransferVulkan;
|
||||
tessDataTransfer = tessDataTransferVulkan = nullptr;
|
||||
|
||||
for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {
|
||||
frame_[i].Destroy(vulkan_);
|
||||
@ -258,7 +267,7 @@ void DrawEngineVulkan::BeginFrame() {
|
||||
frame->pushIndex->Begin(vulkan_);
|
||||
|
||||
// TODO: How can we make this nicer...
|
||||
((TessellationDataTransferVulkan *)tessDataTransfer)->SetPushBuffer(frame->pushUBO);
|
||||
tessDataTransferVulkan->SetPushBuffer(frame->pushUBO);
|
||||
|
||||
DirtyAllUBOs();
|
||||
|
||||
@ -470,23 +479,32 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
||||
n++;
|
||||
}
|
||||
|
||||
// Tessellation data buffer. Make sure this is declared outside the if to avoid optimizer
|
||||
// shenanigans.
|
||||
VkDescriptorBufferInfo tess_buf{};
|
||||
// Tessellation data buffer.
|
||||
if (tess) {
|
||||
VkBuffer buf;
|
||||
VkDeviceSize offset;
|
||||
VkDeviceSize range;
|
||||
((TessellationDataTransferVulkan *)tessDataTransfer)->GetBufferAndOffset(&buf, &offset, &range);
|
||||
assert(buf);
|
||||
tess_buf.buffer = buf;
|
||||
tess_buf.offset = offset;
|
||||
tess_buf.range = range;
|
||||
tessOffset_ = offset;
|
||||
const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo();
|
||||
// Control Points
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].pNext = nullptr;
|
||||
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF;
|
||||
writes[n].pBufferInfo = &tess_buf;
|
||||
writes[n].pBufferInfo = &bufInfo[0];
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
writes[n].dstSet = desc;
|
||||
n++;
|
||||
// Weights U
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].pNext = nullptr;
|
||||
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
|
||||
writes[n].pBufferInfo = &bufInfo[1];
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
writes[n].dstSet = desc;
|
||||
n++;
|
||||
// Weights V
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].pNext = nullptr;
|
||||
writes[n].dstBinding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
|
||||
writes[n].pBufferInfo = &bufInfo[2];
|
||||
writes[n].descriptorCount = 1;
|
||||
writes[n].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
writes[n].dstSet = desc;
|
||||
@ -825,8 +843,7 @@ void DrawEngineVulkan::DoFlush() {
|
||||
if (useElements) {
|
||||
if (!ibuf)
|
||||
ibOffset = (uint32_t)frame->pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &ibuf);
|
||||
int numInstances = tess ? numPatches : 1;
|
||||
renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, numInstances, VK_INDEX_TYPE_UINT16);
|
||||
renderManager->DrawIndexed(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1, VK_INDEX_TYPE_UINT16);
|
||||
} else {
|
||||
renderManager->Draw(pipelineLayout_, ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
|
||||
}
|
||||
@ -994,16 +1011,7 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) {
|
||||
}
|
||||
}
|
||||
|
||||
DrawEngineVulkan::TessellationDataTransferVulkan::TessellationDataTransferVulkan(VulkanContext *vulkan)
|
||||
: TessellationDataTransfer(), vulkan_(vulkan) {
|
||||
}
|
||||
|
||||
DrawEngineVulkan::TessellationDataTransferVulkan::~TessellationDataTransferVulkan() {
|
||||
}
|
||||
|
||||
void DrawEngineVulkan::TessellationDataTransferVulkan::PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) {
|
||||
colStride = 4;
|
||||
|
||||
void TessellationDataTransferVulkan::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
|
||||
// SSBOs that are not simply float1 or float2 need to be padded up to a float4 size. vec3 members
|
||||
// also need to be 16-byte aligned, hence the padding.
|
||||
struct TessData {
|
||||
@ -1012,18 +1020,28 @@ void DrawEngineVulkan::TessellationDataTransferVulkan::PrepareBuffers(float *&po
|
||||
float color[4];
|
||||
};
|
||||
|
||||
int size = size_u * size_v;
|
||||
|
||||
int ssboAlignment = vulkan_->GetPhysicalDeviceProperties(vulkan_->GetCurrentPhysicalDevice()).limits.minStorageBufferOffsetAlignment;
|
||||
uint8_t *data = (uint8_t *)push_->PushAligned(size * sizeof(TessData), &offset_, &buf_, ssboAlignment);
|
||||
range_ = size * sizeof(TessData);
|
||||
uint8_t *data = (uint8_t *)push_->PushAligned(size * sizeof(TessData), (uint32_t *)&bufInfo_[0].offset, &bufInfo_[0].buffer, ssboAlignment);
|
||||
bufInfo_[0].range = size * sizeof(TessData);
|
||||
|
||||
pos = (float *)(data);
|
||||
tex = (float *)(data + offsetof(TessData, uv));
|
||||
col = (float *)(data + offsetof(TessData, color));
|
||||
posStride = sizeof(TessData) / sizeof(float);
|
||||
colStride = hasColor ? (sizeof(TessData) / sizeof(float)) : 0;
|
||||
texStride = sizeof(TessData) / sizeof(float);
|
||||
}
|
||||
float *pos = (float *)(data);
|
||||
float *tex = (float *)(data + offsetof(TessData, uv));
|
||||
float *col = (float *)(data + offsetof(TessData, color));
|
||||
int stride = sizeof(TessData) / sizeof(float);
|
||||
|
||||
void DrawEngineVulkan::TessellationDataTransferVulkan::SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) {
|
||||
// Nothing to do here! The caller will write directly to the pushbuffer through the pointers it got through PrepareBuffers.
|
||||
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
|
||||
|
||||
using Spline::Weight;
|
||||
|
||||
// Weights U
|
||||
data = (uint8_t *)push_->PushAligned(weights.size_u * sizeof(Weight), (uint32_t *)&bufInfo_[1].offset, &bufInfo_[1].buffer, ssboAlignment);
|
||||
memcpy(data, weights.u, weights.size_u * sizeof(Weight));
|
||||
bufInfo_[1].range = weights.size_u * sizeof(Weight);
|
||||
|
||||
// Weights V
|
||||
data = (uint8_t *)push_->PushAligned(weights.size_v * sizeof(Weight), (uint32_t *)&bufInfo_[2].offset, &bufInfo_[2].buffer, ssboAlignment);
|
||||
memcpy(data, weights.v, weights.size_v * sizeof(Weight));
|
||||
bufInfo_[2].range = weights.size_v * sizeof(Weight);
|
||||
}
|
||||
|
@ -117,6 +117,20 @@ public:
|
||||
|
||||
class VulkanRenderManager;
|
||||
|
||||
class TessellationDataTransferVulkan : public TessellationDataTransfer {
|
||||
public:
|
||||
TessellationDataTransferVulkan(VulkanContext *vulkan) : vulkan_(vulkan) {}
|
||||
|
||||
void SetPushBuffer(VulkanPushBuffer *push) { push_ = push; }
|
||||
// Send spline/bezier's control points and weights to vertex shader through structured shader buffer.
|
||||
void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) override;
|
||||
const VkDescriptorBufferInfo *GetBufferInfo() { return bufInfo_; }
|
||||
private:
|
||||
VulkanContext *vulkan_;
|
||||
VulkanPushBuffer *push_; // Updated each frame.
|
||||
VkDescriptorBufferInfo bufInfo_[3]{};
|
||||
};
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class DrawEngineVulkan : public DrawEngineCommon {
|
||||
public:
|
||||
@ -278,31 +292,5 @@ private:
|
||||
int tessOffset_ = 0;
|
||||
|
||||
// Hardware tessellation
|
||||
class TessellationDataTransferVulkan : public TessellationDataTransfer {
|
||||
public:
|
||||
TessellationDataTransferVulkan(VulkanContext *vulkan);
|
||||
~TessellationDataTransferVulkan();
|
||||
|
||||
void SetPushBuffer(VulkanPushBuffer *push) { push_ = push; }
|
||||
void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) override;
|
||||
void PrepareBuffers(float *&pos, float *&tex, float *&col, int &posStride, int &texStride, int &colStride, int size, bool hasColor, bool hasTexCoords) override;
|
||||
|
||||
void GetBufferAndOffset(VkBuffer *buf, VkDeviceSize *offset, VkDeviceSize *range) {
|
||||
*buf = buf_;
|
||||
*offset = (VkDeviceSize)offset_;
|
||||
*range = (VkDeviceSize)range_;
|
||||
|
||||
buf_ = 0;
|
||||
offset_ = 0;
|
||||
range_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
VulkanContext *vulkan_;
|
||||
VulkanPushBuffer *push_; // Updated each frame.
|
||||
|
||||
uint32_t offset_ = 0;
|
||||
uint32_t range_ = 0;
|
||||
VkBuffer buf_ = VK_NULL_HANDLE;
|
||||
};
|
||||
TessellationDataTransferVulkan *tessDataTransferVulkan;
|
||||
};
|
||||
|
@ -133,6 +133,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
bool doSpline = id.Bit(VS_BIT_SPLINE);
|
||||
bool hasColorTess = id.Bit(VS_BIT_HAS_COLOR_TESS);
|
||||
bool hasTexcoordTess = id.Bit(VS_BIT_HAS_TEXCOORD_TESS);
|
||||
bool hasNormalTess = id.Bit(VS_BIT_HAS_NORMAL_TESS);
|
||||
bool flipNormalTess = id.Bit(VS_BIT_NORM_REVERSE_TESS);
|
||||
|
||||
WRITE(p, "\n");
|
||||
@ -219,78 +220,90 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
WRITE(p, " vec4 pos;\n");
|
||||
WRITE(p, " vec4 uv;\n");
|
||||
WRITE(p, " vec4 color;\n");
|
||||
WRITE(p, "};");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "layout (std430, set = 0, binding = 6) readonly buffer s_tess_data {\n");
|
||||
WRITE(p, " TessData data[];");
|
||||
WRITE(p, " TessData data[];\n");
|
||||
WRITE(p, "} tess_data;\n");
|
||||
|
||||
WRITE(p, "layout (std430) struct TessWeight {\n");
|
||||
WRITE(p, " vec4 basis;\n");
|
||||
WRITE(p, " vec4 deriv;\n");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "layout (std430, set = 0, binding = 7) readonly buffer s_tess_weights_u {\n");
|
||||
WRITE(p, " TessWeight data[];\n");
|
||||
WRITE(p, "} tess_weights_u;\n");
|
||||
WRITE(p, "layout (std430, set = 0, binding = 8) readonly buffer s_tess_weights_v {\n");
|
||||
WRITE(p, " TessWeight data[];\n");
|
||||
WRITE(p, "} tess_weights_v;\n");
|
||||
|
||||
for (int i = 2; i <= 4; i++) {
|
||||
// Define 3 types vec2, vec3, vec4
|
||||
WRITE(p, "vec%d tess_sample(in vec%d points[16], in vec2 weights[4]) {\n", i, i);
|
||||
WRITE(p, " vec%d pos = vec%d(0);\n", i, i);
|
||||
WRITE(p, " for (int i = 0; i < 4; ++i) {\n");
|
||||
WRITE(p, " for (int j = 0; j < 4; ++j) {\n");
|
||||
WRITE(p, " float f = weights[j].x * weights[i].y;\n");
|
||||
WRITE(p, " if (f != 0)\n");
|
||||
WRITE(p, " pos = pos + f * points[i * 4 + j];\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, "vec%d tess_sample(in vec%d points[16], mat4 weights) {\n", i, i);
|
||||
WRITE(p, " vec%d pos = vec%d(0.0);\n", i, i);
|
||||
for (int v = 0; v < 4; ++v) {
|
||||
for (int u = 0; u < 4; ++u) {
|
||||
WRITE(p, " pos += weights[%i][%i] * points[%i];\n", v, u, v * 4 + u);
|
||||
}
|
||||
}
|
||||
WRITE(p, " return pos;\n");
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
if (doSpline) {
|
||||
WRITE(p, "void spline_knot(ivec2 num_patches, ivec2 type, out vec2 knot[6], ivec2 patch_pos) {\n");
|
||||
WRITE(p, " for (int i = 0; i < 6; ++i) {\n");
|
||||
WRITE(p, " knot[i] = vec2(i + patch_pos.x - 2, i + patch_pos.y - 2);\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.x & 1) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.x <= 2)\n");
|
||||
WRITE(p, " knot[0].x = 0;\n");
|
||||
WRITE(p, " if (patch_pos.x <= 1)\n");
|
||||
WRITE(p, " knot[1].x = 0;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.x & 2) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.x >= (num_patches.x - 2))\n");
|
||||
WRITE(p, " knot[5].x = num_patches.x;\n");
|
||||
WRITE(p, " if (patch_pos.x == (num_patches.x - 1))\n");
|
||||
WRITE(p, " knot[4].x = num_patches.x;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.y & 1) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.y <= 2)\n");
|
||||
WRITE(p, " knot[0].y = 0;\n");
|
||||
WRITE(p, " if (patch_pos.y <= 1)\n");
|
||||
WRITE(p, " knot[1].y = 0;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " if ((type.y & 2) != 0) {\n");
|
||||
WRITE(p, " if (patch_pos.y >= (num_patches.y - 2))\n");
|
||||
WRITE(p, " knot[5].y = num_patches.y;\n");
|
||||
WRITE(p, " if (patch_pos.y == (num_patches.y - 1))\n");
|
||||
WRITE(p, " knot[4].y = num_patches.y;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, "}\n");
|
||||
|
||||
WRITE(p, "void spline_weight(vec2 t, in vec2 knot[6], out vec2 weights[4]) {\n");
|
||||
// TODO: Maybe compilers could be coaxed into vectorizing this code without the above explicitly...
|
||||
WRITE(p, " vec2 t0 = (t - knot[0]);\n");
|
||||
WRITE(p, " vec2 t1 = (t - knot[1]);\n");
|
||||
WRITE(p, " vec2 t2 = (t - knot[2]);\n");
|
||||
// TODO: All our knots are integers so we should be able to get rid of these divisions (How?)
|
||||
WRITE(p, " vec2 f30 = t0 / (knot[3] - knot[0]);\n");
|
||||
WRITE(p, " vec2 f41 = t1 / (knot[4] - knot[1]);\n");
|
||||
WRITE(p, " vec2 f52 = t2 / (knot[5] - knot[2]);\n");
|
||||
WRITE(p, " vec2 f31 = t1 / (knot[3] - knot[1]);\n");
|
||||
WRITE(p, " vec2 f42 = t2 / (knot[4] - knot[2]);\n");
|
||||
WRITE(p, " vec2 f32 = t2 / (knot[3] - knot[2]);\n");
|
||||
WRITE(p, " vec2 a = (1 - f30)*(1 - f31);\n");
|
||||
WRITE(p, " vec2 b = (f31*f41);\n");
|
||||
WRITE(p, " vec2 c = (1 - f41)*(1 - f42);\n");
|
||||
WRITE(p, " vec2 d = (f42*f52);\n");
|
||||
WRITE(p, " weights[0] = a - (a*f32);\n");
|
||||
WRITE(p, " weights[1] = 1 - a - b + ((a + b + c - 1)*f32);\n");
|
||||
WRITE(p, " weights[2] = b + ((1 - b - c - d)*f32);\n");
|
||||
WRITE(p, " weights[3] = d*f32;\n");
|
||||
WRITE(p, "}\n");
|
||||
WRITE(p, "struct Tess {\n");
|
||||
WRITE(p, " vec3 pos;\n");
|
||||
if (doTexture)
|
||||
WRITE(p, " vec2 tex;\n");
|
||||
WRITE(p, " vec4 col;\n");
|
||||
if (hasNormalTess)
|
||||
WRITE(p, " vec3 nrm;\n");
|
||||
WRITE(p, "};\n");
|
||||
|
||||
WRITE(p, "void tessellate(out Tess tess) {\n");
|
||||
WRITE(p, " ivec2 point_pos = ivec2(position.z, normal.z)%s;\n", doBezier ? " * 3" : "");
|
||||
WRITE(p, " ivec2 weight_idx = ivec2(position.xy);\n");
|
||||
// Load 4x4 control points
|
||||
WRITE(p, " vec3 _pos[16];\n");
|
||||
WRITE(p, " vec2 _tex[16];\n");
|
||||
WRITE(p, " vec4 _col[16];\n");
|
||||
WRITE(p, " int index;\n");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
WRITE(p, " index = (%i + point_pos.y) * int(base.spline_counts) + (%i + point_pos.x);\n", i, j);
|
||||
WRITE(p, " _pos[%i] = tess_data.data[index].pos.xyz;\n", i * 4 + j);
|
||||
if (doTexture && hasTexcoordTess)
|
||||
WRITE(p, " _tex[%i] = tess_data.data[index].uv.xy;\n", i * 4 + j);
|
||||
if (hasColorTess)
|
||||
WRITE(p, " _col[%i] = tess_data.data[index].color;\n", i * 4 + j);
|
||||
}
|
||||
}
|
||||
|
||||
// Basis polynomials as weight coefficients
|
||||
WRITE(p, " vec4 basis_u = tess_weights_u.data[weight_idx.x].basis;\n");
|
||||
WRITE(p, " vec4 basis_v = tess_weights_v.data[weight_idx.y].basis;\n");
|
||||
WRITE(p, " mat4 basis = outerProduct(basis_u, basis_v);\n");
|
||||
|
||||
// Tessellate
|
||||
WRITE(p, " tess.pos = tess_sample(_pos, basis);\n");
|
||||
if (doTexture) {
|
||||
if (hasTexcoordTess)
|
||||
WRITE(p, " tess.tex = tess_sample(_tex, basis);\n");
|
||||
else
|
||||
WRITE(p, " tess.tex = normal.xy;\n");
|
||||
}
|
||||
if (hasColorTess)
|
||||
WRITE(p, " tess.col = tess_sample(_col, basis);\n");
|
||||
else
|
||||
WRITE(p, " tess.col = base.matambientalpha;\n");
|
||||
if (hasNormalTess) {
|
||||
// Derivatives as weight coefficients
|
||||
WRITE(p, " vec4 deriv_u = tess_weights_u.data[weight_idx.x].deriv;\n");
|
||||
WRITE(p, " vec4 deriv_v = tess_weights_v.data[weight_idx.y].deriv;\n");
|
||||
|
||||
WRITE(p, " vec3 du = tess_sample(_pos, outerProduct(deriv_u, basis_v));\n");
|
||||
WRITE(p, " vec3 dv = tess_sample(_pos, outerProduct(basis_u, deriv_v));\n");
|
||||
WRITE(p, " tess.nrm = normalize(cross(du, dv));\n");
|
||||
}
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
WRITE(p, "void main() {\n");
|
||||
@ -330,103 +343,13 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
// Step 1: World Transform / Skinning
|
||||
if (!enableBones) {
|
||||
if (doBezier || doSpline) {
|
||||
WRITE(p, " vec3 _pos[16];\n");
|
||||
WRITE(p, " vec2 _tex[16];\n");
|
||||
WRITE(p, " vec4 _col[16];\n");
|
||||
WRITE(p, " int spline_count_u = int(base.spline_counts & 0xff);\n");
|
||||
WRITE(p, " int spline_count_v = int((base.spline_counts >> 8) & 0xff);\n");
|
||||
WRITE(p, " int num_patches_u = %s;\n", doBezier ? "(spline_count_u - 1) / 3" : "spline_count_u - 3");
|
||||
WRITE(p, " int u = int(mod(gl_InstanceIndex, num_patches_u));\n");
|
||||
WRITE(p, " int v = gl_InstanceIndex / num_patches_u;\n");
|
||||
WRITE(p, " ivec2 patch_pos = ivec2(u, v);\n");
|
||||
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
|
||||
WRITE(p, " for (int j = 0; j < 4; j++) {\n");
|
||||
WRITE(p, " int idx = (i + v%s) * spline_count_u + (j + u%s);\n", doBezier ? " * 3" : "", doBezier ? " * 3" : "");
|
||||
WRITE(p, " _pos[i * 4 + j] = tess_data.data[idx].pos.xyz;\n");
|
||||
if (doTexture && hasTexcoord && hasTexcoordTess)
|
||||
WRITE(p, " _tex[i * 4 + j] = tess_data.data[idx].uv.xy;\n");
|
||||
if (hasColor && hasColorTess)
|
||||
WRITE(p, " _col[i * 4 + j] = tess_data.data[idx].color;\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " }\n");
|
||||
WRITE(p, " vec2 tess_pos = position.xy;\n");
|
||||
WRITE(p, " vec2 weights[4];\n");
|
||||
if (doBezier) {
|
||||
// Bernstein 3D
|
||||
WRITE(p, " weights[0] = (1 - tess_pos) * (1 - tess_pos) * (1 - tess_pos);\n");
|
||||
WRITE(p, " weights[1] = 3 * tess_pos * (1 - tess_pos) * (1 - tess_pos);\n");
|
||||
WRITE(p, " weights[2] = 3 * tess_pos * tess_pos * (1 - tess_pos);\n");
|
||||
WRITE(p, " weights[3] = tess_pos * tess_pos * tess_pos;\n");
|
||||
} else { // Spline
|
||||
WRITE(p, " ivec2 spline_num_patches = ivec2(spline_count_u - 3, spline_count_v - 3);\n");
|
||||
WRITE(p, " int spline_type_u = int((base.spline_counts >> 16) & 0xff);\n");
|
||||
WRITE(p, " int spline_type_v = int((base.spline_counts >> 24) & 0xff);\n");
|
||||
WRITE(p, " ivec2 spline_type = ivec2(spline_type_u, spline_type_v);\n");
|
||||
WRITE(p, " vec2 knots[6];\n");
|
||||
WRITE(p, " spline_knot(spline_num_patches, spline_type, knots, patch_pos);\n");
|
||||
WRITE(p, " spline_weight(tess_pos + patch_pos, knots, weights);\n");
|
||||
}
|
||||
WRITE(p, " vec3 pos = tess_sample(_pos, weights);\n");
|
||||
if (doTexture && hasTexcoord) {
|
||||
if (hasTexcoordTess)
|
||||
WRITE(p, " vec2 tex = tess_sample(_tex, weights);\n");
|
||||
else
|
||||
WRITE(p, " vec2 tex = tess_pos + patch_pos;\n");
|
||||
}
|
||||
if (hasColor) {
|
||||
if (hasColorTess)
|
||||
WRITE(p, " vec4 col = tess_sample(_col, weights);\n");
|
||||
else
|
||||
WRITE(p, " vec4 col = tess_data.data[0].color;\n");
|
||||
}
|
||||
if (hasNormal) {
|
||||
// Curved surface is probably always need to compute normal(not sampling from control points)
|
||||
if (doBezier) {
|
||||
// Bernstein derivative
|
||||
WRITE(p, " vec2 bernderiv[4];\n");
|
||||
WRITE(p, " bernderiv[0] = -3 * (tess_pos - 1) * (tess_pos - 1); \n");
|
||||
WRITE(p, " bernderiv[1] = 9 * tess_pos * tess_pos - 12 * tess_pos + 3; \n");
|
||||
WRITE(p, " bernderiv[2] = 3 * (2 - 3 * tess_pos) * tess_pos; \n");
|
||||
WRITE(p, " bernderiv[3] = 3 * tess_pos * tess_pos; \n");
|
||||
// Hardware tessellation
|
||||
WRITE(p, " Tess tess;\n");
|
||||
WRITE(p, " tessellate(tess);\n");
|
||||
|
||||
WRITE(p, " vec2 bernderiv_u[4];\n");
|
||||
WRITE(p, " vec2 bernderiv_v[4];\n");
|
||||
WRITE(p, " for (int i = 0; i < 4; i++) {\n");
|
||||
WRITE(p, " bernderiv_u[i] = vec2(bernderiv[i].x, weights[i].y);\n");
|
||||
WRITE(p, " bernderiv_v[i] = vec2(weights[i].x, bernderiv[i].y);\n");
|
||||
WRITE(p, " }\n");
|
||||
|
||||
WRITE(p, " vec3 du = tess_sample(_pos, bernderiv_u);\n");
|
||||
WRITE(p, " vec3 dv = tess_sample(_pos, bernderiv_v);\n");
|
||||
} else { // Spline
|
||||
WRITE(p, " vec2 tess_next_u = vec2(normal.x, 0);\n");
|
||||
WRITE(p, " vec2 tess_next_v = vec2(0, normal.y);\n");
|
||||
// Right
|
||||
WRITE(p, " vec2 tess_pos_r = tess_pos + tess_next_u;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_r + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_r = tess_sample(_pos, weights);\n");
|
||||
// Left
|
||||
WRITE(p, " vec2 tess_pos_l = tess_pos - tess_next_u;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_l + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_l = tess_sample(_pos, weights);\n");
|
||||
// Down
|
||||
WRITE(p, " vec2 tess_pos_d = tess_pos + tess_next_v;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_d + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_d = tess_sample(_pos, weights);\n");
|
||||
// Up
|
||||
WRITE(p, " vec2 tess_pos_u = tess_pos - tess_next_v;\n");
|
||||
WRITE(p, " spline_weight(tess_pos_u + patch_pos, knots, weights);\n");
|
||||
WRITE(p, " vec3 pos_u = tess_sample(_pos, weights);\n");
|
||||
|
||||
WRITE(p, " vec3 du = pos_r - pos_l;\n");
|
||||
WRITE(p, " vec3 dv = pos_d - pos_u;\n");
|
||||
}
|
||||
WRITE(p, " vec3 nrm = cross(du, dv);\n");
|
||||
WRITE(p, " nrm = normalize(nrm);\n");
|
||||
}
|
||||
WRITE(p, " vec3 worldpos = vec4(pos.xyz, 1.0) * base.world_mtx;\n");
|
||||
if (hasNormal) {
|
||||
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%snrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
|
||||
WRITE(p, " vec3 worldpos = vec4(tess.pos.xyz, 1.0) * base.world_mtx;\n");
|
||||
if (hasNormalTess) {
|
||||
WRITE(p, " mediump vec3 worldnormal = normalize(vec4(%stess.nrm, 0.0) * base.world_mtx);\n", flipNormalTess ? "-" : "");
|
||||
} else {
|
||||
WRITE(p, " mediump vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
|
||||
}
|
||||
@ -483,9 +406,10 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
const char *diffuseStr = ((matUpdate & 2) && hasColor) ? "color0.rgb" : "light.matdiffuse";
|
||||
const char *specularStr = ((matUpdate & 4) && hasColor) ? "color0.rgb" : "light.matspecular.rgb";
|
||||
if (doBezier || doSpline) {
|
||||
ambientStr = (matUpdate & 1) && hasColor ? "col" : "base.matambientalpha";
|
||||
diffuseStr = (matUpdate & 2) && hasColor ? "col.rgb" : "light.matdiffuse";
|
||||
specularStr = (matUpdate & 4) && hasColor ? "col.rgb" : "light.matspecular.rgb";
|
||||
// TODO: Probably, should use hasColorTess but FF4 has a problem with drawing the background.
|
||||
ambientStr = (matUpdate & 1) && hasColor ? "tess.col" : "base.matambientalpha";
|
||||
diffuseStr = (matUpdate & 2) && hasColor ? "tess.col.rgb" : "light.matdiffuse";
|
||||
specularStr = (matUpdate & 4) && hasColor ? "tess.col.rgb" : "light.matspecular.rgb";
|
||||
}
|
||||
|
||||
bool diffuseIsZero = true;
|
||||
@ -606,7 +530,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
// Lighting doesn't affect color.
|
||||
if (hasColor) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " v_color0 = col;\n");
|
||||
WRITE(p, " v_color0 = tess.col;\n");
|
||||
else
|
||||
WRITE(p, " v_color0 = color0;\n");
|
||||
} else {
|
||||
@ -627,7 +551,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
if (scaleUV) {
|
||||
if (hasTexcoord) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
|
||||
WRITE(p, " v_texcoord = vec3(tess.tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
|
||||
else
|
||||
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy, 0.0);\n");
|
||||
} else {
|
||||
@ -635,10 +559,7 @@ bool GenerateVulkanGLSLVertexShader(const VShaderID &id, char *buffer) {
|
||||
}
|
||||
} else {
|
||||
if (hasTexcoord) {
|
||||
if (doBezier || doSpline)
|
||||
WRITE(p, " v_texcoord = vec3(tex.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
|
||||
else
|
||||
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
|
||||
WRITE(p, " v_texcoord = vec3(texcoord.xy * base.uvscaleoffset.xy + base.uvscaleoffset.zw, 0.0);\n");
|
||||
} else {
|
||||
WRITE(p, " v_texcoord = vec3(base.uvscaleoffset.zw, 0.0);\n");
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ bool GameSettingsScreen::UseVerticalLayout() const {
|
||||
|
||||
// This needs before run CheckGPUFeatures()
|
||||
// TODO: Remove this if fix the issue
|
||||
bool CheckSupportInstancedTessellationGLES() {
|
||||
bool CheckSupportShaderTessellationGLES() {
|
||||
#if PPSSPP_PLATFORM(UWP)
|
||||
return true;
|
||||
#else
|
||||
@ -88,21 +88,17 @@ bool CheckSupportInstancedTessellationGLES() {
|
||||
int maxVertexTextureImageUnits = gl_extensions.maxVertexTextureUnits;
|
||||
bool vertexTexture = maxVertexTextureImageUnits >= 3; // At least 3 for hardware tessellation
|
||||
|
||||
bool canUseInstanceID = gl_extensions.EXT_draw_instanced || gl_extensions.ARB_draw_instanced;
|
||||
bool canDefInstanceID = gl_extensions.IsGLES || gl_extensions.EXT_gpu_shader4 || gl_extensions.VersionGEThan(3, 1);
|
||||
bool instanceRendering = gl_extensions.GLES3 || (canUseInstanceID && canDefInstanceID);
|
||||
|
||||
bool textureFloat = gl_extensions.ARB_texture_float || gl_extensions.OES_texture_float;
|
||||
bool hasTexelFetch = gl_extensions.GLES3 || (!gl_extensions.IsGLES && gl_extensions.VersionGEThan(3, 3, 0)) || gl_extensions.EXT_gpu_shader4;
|
||||
|
||||
return instanceRendering && vertexTexture && textureFloat && hasTexelFetch;
|
||||
return vertexTexture && textureFloat && hasTexelFetch;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool DoesBackendSupportHWTess() {
|
||||
switch (GetGPUBackend()) {
|
||||
case GPUBackend::OPENGL:
|
||||
return CheckSupportInstancedTessellationGLES();
|
||||
return CheckSupportShaderTessellationGLES();
|
||||
case GPUBackend::VULKAN:
|
||||
case GPUBackend::DIRECT3D11:
|
||||
return true;
|
||||
@ -392,11 +388,10 @@ void GameSettingsScreen::CreateViews() {
|
||||
}
|
||||
return UI::EVENT_CONTINUE;
|
||||
});
|
||||
beziersChoice->SetDisabledPtr(&g_Config.bHardwareTessellation);
|
||||
|
||||
CheckBox *tessellationHW = graphicsSettings->Add(new CheckBox(&g_Config.bHardwareTessellation, gr->T("Hardware Tessellation")));
|
||||
tessellationHW->OnClick.Add([=](EventParams &e) {
|
||||
settingInfo_->Show(gr->T("HardwareTessellation Tip", "Uses hardware to make curves, always uses a fixed quality"), e.v);
|
||||
settingInfo_->Show(gr->T("HardwareTessellation Tip", "Uses hardware to make curves"), e.v);
|
||||
return UI::EVENT_CONTINUE;
|
||||
});
|
||||
tessHWEnable_ = DoesBackendSupportHWTess() && !g_Config.bSoftwareRendering && g_Config.bHardwareTransform;
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/SplineCommon.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "Common/MemoryUtil.h"
|
||||
|
||||
static const char preview_fs[] =
|
||||
"#ifdef GL_ES\n"
|
||||
@ -164,96 +165,104 @@ u32 CGEDebugger::PrimPreviewOp() {
|
||||
}
|
||||
|
||||
static void ExpandBezier(int &count, int op, const std::vector<SimpleVertex> &simpleVerts, const std::vector<u16> &indices, std::vector<SimpleVertex> &generatedVerts, std::vector<u16> &generatedInds) {
|
||||
int count_u = (op & 0x00FF) >> 0;
|
||||
int count_v = (op & 0xFF00) >> 8;
|
||||
using namespace Spline;
|
||||
|
||||
int tess_u = gstate.getPatchDivisionU();
|
||||
int tess_v = gstate.getPatchDivisionV();
|
||||
if (tess_u < 1) {
|
||||
tess_u = 1;
|
||||
}
|
||||
if (tess_v < 1) {
|
||||
tess_v = 1;
|
||||
}
|
||||
int count_u = (op >> 0) & 0xFF;
|
||||
int count_v = (op >> 8) & 0xFF;
|
||||
// Real hardware seems to draw nothing when given < 4 either U or V.
|
||||
if (count_u < 4 || count_v < 4)
|
||||
return;
|
||||
|
||||
// Bezier patches share less control points than spline patches. Otherwise they are pretty much the same (except bezier don't support the open/close thing)
|
||||
int num_patches_u = (count_u - 1) / 3;
|
||||
int num_patches_v = (count_v - 1) / 3;
|
||||
int total_patches = num_patches_u * num_patches_v;
|
||||
std::vector<BezierPatch> patches;
|
||||
patches.resize(total_patches);
|
||||
for (int patch_u = 0; patch_u < num_patches_u; patch_u++) {
|
||||
for (int patch_v = 0; patch_v < num_patches_v; patch_v++) {
|
||||
BezierPatch &patch = patches[patch_u + patch_v * num_patches_u];
|
||||
for (int point = 0; point < 16; ++point) {
|
||||
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u;
|
||||
patch.points[point] = &simpleVerts[0] + (!indices.empty() ? indices[idx] : idx);
|
||||
}
|
||||
patch.u_index = patch_u * 3;
|
||||
patch.v_index = patch_v * 3;
|
||||
patch.index = patch_v * num_patches_u + patch_u;
|
||||
patch.primType = gstate.getPatchPrimitiveType();
|
||||
patch.computeNormals = false;
|
||||
patch.patchFacing = false;
|
||||
}
|
||||
}
|
||||
BezierSurface surface;
|
||||
surface.num_points_u = count_u;
|
||||
surface.num_points_v = count_v;
|
||||
surface.tess_u = gstate.getPatchDivisionU();
|
||||
surface.tess_v = gstate.getPatchDivisionV();
|
||||
surface.num_patches_u = (count_u - 1) / 3;
|
||||
surface.num_patches_v = (count_v - 1) / 3;
|
||||
surface.primType = gstate.getPatchPrimitiveType();
|
||||
surface.patchFacing = false;
|
||||
|
||||
generatedVerts.resize((tess_u + 1) * (tess_v + 1) * total_patches);
|
||||
generatedInds.resize(tess_u * tess_v * 6 * total_patches);
|
||||
int num_points = count_u * count_v;
|
||||
// Make an array of pointers to the control points, to get rid of indices.
|
||||
std::vector<const SimpleVertex *> points(num_points);
|
||||
for (int idx = 0; idx < num_points; idx++)
|
||||
points[idx] = simpleVerts.data() + (!indices.empty() ? indices[idx] : idx);
|
||||
|
||||
count = 0;
|
||||
u8 *dest = (u8 *)&generatedVerts[0];
|
||||
u16 *inds = &generatedInds[0];
|
||||
for (int patch_idx = 0; patch_idx < total_patches; ++patch_idx) {
|
||||
const BezierPatch &patch = patches[patch_idx];
|
||||
TessellateBezierPatch(dest, inds, count, tess_u, tess_v, patch, gstate.vertType);
|
||||
}
|
||||
int total_patches = surface.num_patches_u * surface.num_patches_v;
|
||||
generatedVerts.resize((surface.tess_u + 1) * (surface.tess_v + 1) * total_patches);
|
||||
generatedInds.resize(surface.tess_u * surface.tess_v * 6 * total_patches);
|
||||
|
||||
OutputBuffers output;
|
||||
output.vertices = generatedVerts.data();
|
||||
output.indices = generatedInds.data();
|
||||
output.count = 0;
|
||||
|
||||
ControlPoints cpoints;
|
||||
cpoints.pos = (Vec3f *)AllocateAlignedMemory(sizeof(Vec3f) * num_points, 16);
|
||||
cpoints.tex = (Vec2f *)AllocateAlignedMemory(sizeof(Vec2f) * num_points, 16);
|
||||
cpoints.col = (Vec4f *)AllocateAlignedMemory(sizeof(Vec4f) * num_points, 16);
|
||||
cpoints.Convert(points.data(), num_points);
|
||||
|
||||
surface.Init(generatedVerts.size());
|
||||
SoftwareTessellation(output, surface, gstate.vertType, cpoints);
|
||||
count = output.count;
|
||||
|
||||
FreeAlignedMemory(cpoints.pos);
|
||||
FreeAlignedMemory(cpoints.tex);
|
||||
FreeAlignedMemory(cpoints.col);
|
||||
}
|
||||
|
||||
static void ExpandSpline(int &count, int op, const std::vector<SimpleVertex> &simpleVerts, const std::vector<u16> &indices, std::vector<SimpleVertex> &generatedVerts, std::vector<u16> &generatedInds) {
|
||||
SplinePatchLocal patch;
|
||||
patch.computeNormals = false;
|
||||
patch.primType = gstate.getPatchPrimitiveType();
|
||||
patch.patchFacing = false;
|
||||
|
||||
patch.count_u = (op & 0x00FF) >> 0;
|
||||
patch.count_v = (op & 0xFF00) >> 8;
|
||||
patch.type_u = (op >> 16) & 0x3;
|
||||
patch.type_v = (op >> 18) & 0x3;
|
||||
|
||||
patch.tess_u = gstate.getPatchDivisionU();
|
||||
patch.tess_v = gstate.getPatchDivisionV();
|
||||
if (patch.tess_u < 1) {
|
||||
patch.tess_u = 1;
|
||||
}
|
||||
if (patch.tess_v < 1) {
|
||||
patch.tess_v = 1;
|
||||
}
|
||||
using namespace Spline;
|
||||
|
||||
int count_u = (op >> 0) & 0xFF;
|
||||
int count_v = (op >> 8) & 0xFF;
|
||||
// Real hardware seems to draw nothing when given < 4 either U or V.
|
||||
if (patch.count_u < 4 || patch.count_v < 4) {
|
||||
if (count_u < 4 || count_v < 4)
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<const SimpleVertex *> points;
|
||||
points.resize(patch.count_u * patch.count_v);
|
||||
SplineSurface surface;
|
||||
surface.num_points_u = count_u;
|
||||
surface.num_points_v = count_v;
|
||||
surface.tess_u = gstate.getPatchDivisionU();
|
||||
surface.tess_v = gstate.getPatchDivisionV();
|
||||
surface.type_u = (op >> 16) & 0x3;
|
||||
surface.type_v = (op >> 18) & 0x3;
|
||||
surface.num_patches_u = count_u - 3;
|
||||
surface.num_patches_v = count_v - 3;
|
||||
surface.primType = gstate.getPatchPrimitiveType();
|
||||
surface.patchFacing = false;
|
||||
|
||||
int num_points = count_u * count_v;
|
||||
// Make an array of pointers to the control points, to get rid of indices.
|
||||
for (int idx = 0; idx < patch.count_u * patch.count_v; idx++) {
|
||||
points[idx] = &simpleVerts[0] + (!indices.empty() ? indices[idx] : idx);
|
||||
}
|
||||
patch.points = &points[0];
|
||||
std::vector<const SimpleVertex *> points(num_points);
|
||||
for (int idx = 0; idx < num_points; idx++)
|
||||
points[idx] = simpleVerts.data() + (!indices.empty() ? indices[idx] : idx);
|
||||
|
||||
int patch_div_s = (patch.count_u - 3) * patch.tess_u;
|
||||
int patch_div_t = (patch.count_v - 3) * patch.tess_v;
|
||||
int maxVertexCount = (patch_div_s + 1) * (patch_div_t + 1);
|
||||
|
||||
generatedVerts.resize(maxVertexCount);
|
||||
int patch_div_s = surface.num_patches_u * surface.tess_u;
|
||||
int patch_div_t = surface.num_patches_v * surface.tess_v;
|
||||
generatedVerts.resize((patch_div_s + 1) * (patch_div_t + 1));
|
||||
generatedInds.resize(patch_div_s * patch_div_t * 6);
|
||||
|
||||
count = 0;
|
||||
u8 *dest = (u8 *)&generatedVerts[0];
|
||||
TessellateSplinePatch(dest, &generatedInds[0], count, patch, gstate.vertType, maxVertexCount);
|
||||
OutputBuffers output;
|
||||
output.vertices = generatedVerts.data();
|
||||
output.indices = generatedInds.data();
|
||||
output.count = 0;
|
||||
|
||||
ControlPoints cpoints;
|
||||
cpoints.pos = (Vec3f *)AllocateAlignedMemory(sizeof(Vec3f) * num_points, 16);
|
||||
cpoints.tex = (Vec2f *)AllocateAlignedMemory(sizeof(Vec2f) * num_points, 16);
|
||||
cpoints.col = (Vec4f *)AllocateAlignedMemory(sizeof(Vec4f) * num_points, 16);
|
||||
cpoints.Convert(points.data(), num_points);
|
||||
|
||||
surface.Init(generatedVerts.size());
|
||||
SoftwareTessellation(output, surface, gstate.vertType, cpoints);
|
||||
count = output.count;
|
||||
|
||||
FreeAlignedMemory(cpoints.pos);
|
||||
FreeAlignedMemory(cpoints.tex);
|
||||
FreeAlignedMemory(cpoints.col);
|
||||
}
|
||||
|
||||
void CGEDebugger::UpdatePrimPreview(u32 op, int which) {
|
||||
|
@ -306,14 +306,14 @@ void GLQueueRunner::RunInitSteps(const std::vector<GLRInitStep> &steps, bool ski
|
||||
glBindTexture(tex->target, tex->texture);
|
||||
boundTexture = tex->texture;
|
||||
}
|
||||
if (!step.texture_image.data)
|
||||
if (!step.texture_image.data && step.texture_image.allocType != GLRAllocType::NONE)
|
||||
Crash();
|
||||
// For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage.
|
||||
glTexImage2D(tex->target, step.texture_image.level, step.texture_image.internalFormat, step.texture_image.width, step.texture_image.height, 0, step.texture_image.format, step.texture_image.type, step.texture_image.data);
|
||||
allocatedTextures = true;
|
||||
if (step.texture_image.allocType == GLRAllocType::ALIGNED) {
|
||||
FreeAlignedMemory(step.texture_image.data);
|
||||
} else {
|
||||
} else if (step.texture_image.allocType == GLRAllocType::NEW) {
|
||||
delete[] step.texture_image.data;
|
||||
}
|
||||
CHECK_GL_ERROR_IF_DEBUG();
|
||||
@ -490,7 +490,19 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCal
|
||||
const GLRStep &step = *steps[i];
|
||||
switch (step.stepType) {
|
||||
case GLRStepType::RENDER:
|
||||
// TODO: With #11425 there'll be a case where we should really free spline data here.
|
||||
for (const auto &c : step.commands) {
|
||||
switch (c.cmd) {
|
||||
case GLRRenderCommand::TEXTURE_SUBIMAGE:
|
||||
if (c.texture_subimage.data) {
|
||||
if (c.texture_subimage.allocType == GLRAllocType::ALIGNED) {
|
||||
FreeAlignedMemory(c.texture_subimage.data);
|
||||
} else if (c.texture_subimage.allocType == GLRAllocType::NEW) {
|
||||
delete[] c.texture_subimage.data;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
delete steps[i];
|
||||
@ -1024,6 +1036,22 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
case GLRRenderCommand::TEXTURE_SUBIMAGE:
|
||||
{
|
||||
GLRTexture *tex = c.texture_subimage.texture;
|
||||
// TODO: Need bind?
|
||||
if (!c.texture_subimage.data)
|
||||
Crash();
|
||||
// For things to show in RenderDoc, need to split into glTexImage2D(..., nullptr) and glTexSubImage.
|
||||
glTexSubImage2D(tex->target, c.texture_subimage.level, c.texture_subimage.x, c.texture_subimage.y, c.texture_subimage.width, c.texture_subimage.height, c.texture_subimage.format, c.texture_subimage.type, c.texture_subimage.data);
|
||||
if (c.texture_subimage.allocType == GLRAllocType::ALIGNED) {
|
||||
FreeAlignedMemory(c.texture_subimage.data);
|
||||
} else if (c.texture_subimage.allocType == GLRAllocType::NEW) {
|
||||
delete[] c.texture_subimage.data;
|
||||
}
|
||||
CHECK_GL_ERROR_IF_DEBUG();
|
||||
break;
|
||||
}
|
||||
case GLRRenderCommand::RASTER:
|
||||
if (c.raster.cullEnable) {
|
||||
if (!cullEnabled) {
|
||||
|
@ -20,6 +20,7 @@ struct GLOffset2D {
|
||||
};
|
||||
|
||||
enum class GLRAllocType {
|
||||
NONE,
|
||||
NEW,
|
||||
ALIGNED,
|
||||
};
|
||||
@ -57,6 +58,7 @@ enum class GLRRenderCommand : uint8_t {
|
||||
DRAW,
|
||||
DRAW_INDEXED,
|
||||
PUSH_CONSTANTS,
|
||||
TEXTURE_SUBIMAGE,
|
||||
};
|
||||
|
||||
// TODO: Bloated since the biggest struct decides the size. Will need something more efficient (separate structs with shared
|
||||
@ -138,6 +140,18 @@ struct GLRRenderData {
|
||||
int slot;
|
||||
GLRTexture *texture;
|
||||
} texture;
|
||||
struct {
|
||||
GLRTexture *texture;
|
||||
GLenum format;
|
||||
GLenum type;
|
||||
int level;
|
||||
int x;
|
||||
int y;
|
||||
int width;
|
||||
int height;
|
||||
GLRAllocType allocType;
|
||||
uint8_t *data; // owned, delete[]-d
|
||||
} texture_subimage;
|
||||
struct {
|
||||
int slot;
|
||||
GLRFramebuffer *framebuffer;
|
||||
|
@ -530,6 +530,22 @@ public:
|
||||
initSteps_.push_back(step);
|
||||
}
|
||||
|
||||
void TextureSubImage(GLRTexture *texture, int level, int x, int y, int width, int height, GLenum format, GLenum type, uint8_t *data, GLRAllocType allocType = GLRAllocType::NEW) {
|
||||
_dbg_assert_(G3D, curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
|
||||
GLRRenderData _data{ GLRRenderCommand::TEXTURE_SUBIMAGE };
|
||||
_data.texture_subimage.texture = texture;
|
||||
_data.texture_subimage.data = data;
|
||||
_data.texture_subimage.format = format;
|
||||
_data.texture_subimage.type = type;
|
||||
_data.texture_subimage.level = level;
|
||||
_data.texture_subimage.x = x;
|
||||
_data.texture_subimage.y = y;
|
||||
_data.texture_subimage.width = width;
|
||||
_data.texture_subimage.height = height;
|
||||
_data.texture_subimage.allocType = allocType;
|
||||
curRenderStep_->commands.push_back(_data);
|
||||
}
|
||||
|
||||
void FinalizeTexture(GLRTexture *texture, int maxLevels, bool genMips) {
|
||||
GLRInitStep step{ GLRInitStepType::TEXTURE_FINALIZE };
|
||||
step.texture_finalize.texture = texture;
|
||||
|
Loading…
Reference in New Issue
Block a user