mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
Merge pull request #16274 from unknownbrackets/gpu-boundingbox
Correct accuracy of bounding box test
This commit is contained in:
commit
a13ab3f76b
@ -16,8 +16,10 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cfloat>
|
||||
|
||||
#include "Common/Data/Convert/ColorConv.h"
|
||||
#include "Common/Math/lin/matrix4x4.h"
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Common/LogReporting.h"
|
||||
#include "Core/Config.h"
|
||||
@ -244,38 +246,48 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
|
||||
//
|
||||
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
|
||||
// our clipping planes, we reject the box. Tighter bounds would be desirable but would take more calculations.
|
||||
bool DrawEngineCommon::TestBoundingBox(const void* control_points, int vertexCount, u32 vertType, int *bytesRead) {
|
||||
bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType) {
|
||||
SimpleVertex *corners = (SimpleVertex *)(decoded + 65536 * 12);
|
||||
float *verts = (float *)(decoded + 65536 * 18);
|
||||
|
||||
// Although this may lead to drawing that shouldn't happen, the viewport is more complex on VR.
|
||||
// Let's always say objects are within bounds.
|
||||
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY))
|
||||
return true;
|
||||
|
||||
// Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder
|
||||
// and a large vertex format.
|
||||
if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_FLOAT) {
|
||||
if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_FLOAT && !inds) {
|
||||
verts = (float *)control_points;
|
||||
*bytesRead = 3 * sizeof(float) * vertexCount;
|
||||
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_8BIT) {
|
||||
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_8BIT && !inds) {
|
||||
const s8 *vtx = (const s8 *)control_points;
|
||||
for (int i = 0; i < vertexCount * 3; i++) {
|
||||
verts[i] = vtx[i] * (1.0f / 128.0f);
|
||||
}
|
||||
*bytesRead = 3 * sizeof(s8) * vertexCount;
|
||||
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT) {
|
||||
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT && !inds) {
|
||||
const s16 *vtx = (const s16*)control_points;
|
||||
for (int i = 0; i < vertexCount * 3; i++) {
|
||||
verts[i] = vtx[i] * (1.0f / 32768.0f);
|
||||
}
|
||||
*bytesRead = 3 * sizeof(s16) * vertexCount;
|
||||
} else {
|
||||
// Simplify away bones and morph before proceeding
|
||||
// Simplify away indices, bones, and morph before proceeding.
|
||||
u8 *temp_buffer = decoded + 65536 * 24;
|
||||
int vertexSize = 0;
|
||||
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, 0, vertexCount, vertType, &vertexSize);
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
verts[i * 3] = corners[i].pos.x;
|
||||
verts[i * 3 + 1] = corners[i].pos.y;
|
||||
verts[i * 3 + 2] = corners[i].pos.z;
|
||||
|
||||
u16 indexLowerBound = 0;
|
||||
u16 indexUpperBound = (u16)vertexCount - 1;
|
||||
if (vertexCount > 0 && inds) {
|
||||
GetIndexBounds(inds, vertexCount, vertType, &indexLowerBound, &indexUpperBound);
|
||||
}
|
||||
|
||||
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, indexLowerBound, indexUpperBound, vertType);
|
||||
|
||||
IndexConverter conv(vertType, inds);
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
verts[i * 3] = corners[conv(i)].pos.x;
|
||||
verts[i * 3 + 1] = corners[conv(i)].pos.y;
|
||||
verts[i * 3 + 2] = corners[conv(i)].pos.z;
|
||||
}
|
||||
*bytesRead = vertexSize * vertexCount;
|
||||
}
|
||||
|
||||
Plane planes[6];
|
||||
@ -289,22 +301,63 @@ bool DrawEngineCommon::TestBoundingBox(const void* control_points, int vertexCou
|
||||
// TODO: Create a Matrix4x3ByMatrix4x3, and Matrix4x4ByMatrix4x3?
|
||||
Matrix4ByMatrix4(worldview, world, view);
|
||||
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);
|
||||
PlanesFromMatrix(worldviewproj, planes);
|
||||
for (int plane = 0; plane < 6; plane++) {
|
||||
|
||||
// Next, we need to apply viewport, scissor, region, and even offset - but only for X/Y.
|
||||
// Note that the PSP does not clip against the viewport.
|
||||
const Vec2f baseOffset = Vec2f(gstate.getOffsetX(), gstate.getOffsetY());
|
||||
// Region1 (rate) is used as an X1/Y1 here, matching PSP behavior.
|
||||
Vec2f minOffset = baseOffset + Vec2f(std::max(gstate.getRegionRateX() - 0x100, gstate.getScissorX1()), std::max(gstate.getRegionRateY() - 0x100, gstate.getScissorY1())) - Vec2f(1.0f, 1.0f);
|
||||
Vec2f maxOffset = baseOffset + Vec2f(std::min(gstate.getRegionX2(), gstate.getScissorX2()), std::min(gstate.getRegionY2(), gstate.getScissorY2())) + Vec2f(1.0f, 1.0f);
|
||||
|
||||
// Now let's apply the viewport to our scissor/region + offset range.
|
||||
Vec2f inverseViewportScale = Vec2f(1.0f / gstate.getViewportXScale(), 1.0f / gstate.getViewportYScale());
|
||||
Vec2f minViewport = (minOffset - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
|
||||
Vec2f maxViewport = (maxOffset - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
|
||||
|
||||
Lin::Matrix4x4 applyViewport;
|
||||
applyViewport.empty();
|
||||
// Scale to the viewport's size.
|
||||
applyViewport.xx = 2.0f / (maxViewport.x - minViewport.x);
|
||||
applyViewport.yy = 2.0f / (maxViewport.y - minViewport.y);
|
||||
applyViewport.zz = 1.0f;
|
||||
applyViewport.ww = 1.0f;
|
||||
// And offset to the viewport's centers.
|
||||
applyViewport.wx = -(maxViewport.x + minViewport.x) / (maxViewport.x - minViewport.x);
|
||||
applyViewport.wy = -(maxViewport.y + minViewport.y) / (maxViewport.y - minViewport.y);
|
||||
|
||||
float screenBounds[16];
|
||||
Matrix4ByMatrix4(screenBounds, worldviewproj, applyViewport.m);
|
||||
|
||||
PlanesFromMatrix(screenBounds, planes);
|
||||
// Note: near/far are not checked without clamp/clip enabled, so we skip those planes.
|
||||
int totalPlanes = gstate.isDepthClampEnabled() ? 6 : 4;
|
||||
for (int plane = 0; plane < totalPlanes; plane++) {
|
||||
int inside = 0;
|
||||
int out = 0;
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
// Here we can test against the frustum planes!
|
||||
float value = planes[plane].Test(verts + i * 3);
|
||||
if (value < 0)
|
||||
if (value <= -FLT_EPSILON)
|
||||
out++;
|
||||
else
|
||||
inside++;
|
||||
}
|
||||
|
||||
if (inside == 0) {
|
||||
// All out
|
||||
return false;
|
||||
// All out - but check for X and Y if the offset was near the cullbox edge.
|
||||
bool outsideEdge = false;
|
||||
if (plane == 1)
|
||||
outsideEdge = minOffset.x < 1.0f;
|
||||
if (plane == 2)
|
||||
outsideEdge = minOffset.y < 1.0f;
|
||||
else if (plane == 0)
|
||||
outsideEdge = maxOffset.x >= 4096.0f;
|
||||
else if (plane == 3)
|
||||
outsideEdge = maxOffset.y >= 4096.0f;
|
||||
|
||||
// Only consider this outside if offset + scissor/region is fully inside the cullbox.
|
||||
if (!outsideEdge)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Any out. For testing that the planes are in the right locations.
|
||||
|
@ -92,7 +92,7 @@ public:
|
||||
|
||||
virtual void DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex *buffer, int vertexCount, int cullMode, bool continuation);
|
||||
|
||||
bool TestBoundingBox(const void* control_points, int vertexCount, u32 vertType, int *bytesRead);
|
||||
bool TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType);
|
||||
|
||||
void SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead);
|
||||
template<class Surface>
|
||||
|
@ -460,7 +460,8 @@ public:
|
||||
|
||||
bool throughmode;
|
||||
bool skinInDecode;
|
||||
u8 size;
|
||||
// With morph and weights, this can be more than 256 bytes.
|
||||
u16 size;
|
||||
u8 onesize_;
|
||||
|
||||
u8 weightoff;
|
||||
|
@ -2153,29 +2153,50 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
|
||||
|
||||
void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
|
||||
// Just resetting, nothing to check bounds for.
|
||||
const u32 count = op & 0xFFFFFF;
|
||||
const u32 count = op & 0xFFFF;
|
||||
if (count == 0) {
|
||||
currentList->bboxResult = false;
|
||||
return;
|
||||
}
|
||||
if (((count & 7) == 0) && count <= 64) { // Sanity check
|
||||
const void *control_points = Memory::GetPointer(gstate_c.vertexAddr);
|
||||
|
||||
// Approximate based on timings of several counts on a PSP.
|
||||
cyclesExecuted += count * 22;
|
||||
|
||||
const bool useInds = (gstate.vertType & GE_VTYPE_IDX_MASK) != 0;
|
||||
VertexDecoder *dec = drawEngineCommon_->GetVertexDecoder(gstate.vertType);
|
||||
int bytesRead = (useInds ? 1 : dec->VertexSize()) * count;
|
||||
|
||||
if (Memory::IsValidRange(gstate_c.vertexAddr, bytesRead)) {
|
||||
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
|
||||
if (!control_points) {
|
||||
ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Invalid verts in bounding box check");
|
||||
currentList->bboxResult = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (gstate.vertType & GE_VTYPE_IDX_MASK) {
|
||||
ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Indexed bounding box data not supported.");
|
||||
// Data seems invalid. Let's assume the box test passed.
|
||||
currentList->bboxResult = true;
|
||||
return;
|
||||
const void *inds = nullptr;
|
||||
if (useInds) {
|
||||
int indexShift = ((gstate.vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
|
||||
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
if (!inds || !Memory::IsValidRange(gstate_c.indexAddr, count << indexShift)) {
|
||||
ERROR_LOG_REPORT_ONCE(boundingboxInds, G3D, "Invalid inds in bounding box check");
|
||||
currentList->bboxResult = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Test if the bounding box is within the drawing region.
|
||||
int bytesRead;
|
||||
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, count, gstate.vertType, &bytesRead);
|
||||
// The PSP only seems to vary the result based on a single range of 0x100.
|
||||
if (count > 0x200) {
|
||||
// The second to last set of 0x100 is checked (even for odd counts.)
|
||||
size_t skipSize = (count - 0x200) * dec->VertexSize();
|
||||
currentList->bboxResult = drawEngineCommon_->TestBoundingBox((const uint8_t *)control_points + skipSize, inds, 0x100, gstate.vertType);
|
||||
} else if (count > 0x100) {
|
||||
int checkSize = count - 0x100;
|
||||
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, checkSize, gstate.vertType);
|
||||
} else {
|
||||
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, count, gstate.vertType);
|
||||
}
|
||||
AdvanceVerts(gstate.vertType, count, bytesRead);
|
||||
} else {
|
||||
ERROR_LOG_REPORT_ONCE(boundingbox, G3D, "Bad bounding box data: %06x", count);
|
||||
|
@ -293,7 +293,7 @@ int CtrlVertexList::GetRowCount() {
|
||||
DisplayList list;
|
||||
if (gpuDebug->GetCurrentDisplayList(list)) {
|
||||
u32 cmd = Memory::Read_U32(list.pc);
|
||||
if ((cmd >> 24) == GE_CMD_PRIM) {
|
||||
if ((cmd >> 24) == GE_CMD_PRIM || (cmd >> 24) == GE_CMD_BOUNDINGBOX) {
|
||||
rowCount_ = cmd & 0xFFFF;
|
||||
} else if ((cmd >> 24) == GE_CMD_BEZIER || (cmd >> 24) == GE_CMD_SPLINE) {
|
||||
u32 u = (cmd & 0x00FF) >> 0;
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 2a607edab7c3f50d9597c3a2b77082c3be871b55
|
||||
Subproject commit d40a8e05c3892bd67af71c6f80ea4396f8051541
|
Loading…
Reference in New Issue
Block a user