diff --git a/Core/HLE/sceAtrac.cpp b/Core/HLE/sceAtrac.cpp index 3de58f617a..f8c312e5a6 100644 --- a/Core/HLE/sceAtrac.cpp +++ b/Core/HLE/sceAtrac.cpp @@ -365,16 +365,23 @@ struct Atrac { } u32 currentFileOffset = FileOffsetBySample(currentSample_ - SamplesPerFrame() + FirstOffsetExtra()); - if ((bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK) { - if (currentFileOffset > first_.fileoffset) { - // We've looped in the data we added. - return PSP_ATRAC_LOOP_STREAM_DATA_IS_ON_MEMORY; - } - - if (first_.fileoffset >= first_.filesize && loopNum_ == 0) { - // We don't need anything more; we're not planning to loop again. + if (first_.fileoffset >= first_.filesize) { + if (bufferState_ == ATRAC_STATUS_STREAMED_WITHOUT_LOOP) { return PSP_ATRAC_NONLOOP_STREAM_DATA_IS_ON_MEMORY; } + int loopEndAdjusted = loopEndSample_ - FirstOffsetExtra() - firstSampleOffset_; + if (bufferState_ == ATRAC_STATUS_STREAMED_LOOP_WITH_TRAILER && currentSample_ > loopEndAdjusted) { + // No longer looping in this case, outside the loop. + return PSP_ATRAC_NONLOOP_STREAM_DATA_IS_ON_MEMORY; + } + if ((bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK && loopNum_ == 0) { + return PSP_ATRAC_LOOP_STREAM_DATA_IS_ON_MEMORY; + } + } + + if ((bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK) { + // Since we're streaming, the remaining frames are what's valid in the buffer. + return bufferValidBytes_ / bytesPerFrame_; } // Since the first frame is shorter by this offset, add to round up at this offset. @@ -563,13 +570,27 @@ struct Atrac { void CalculateStreamInfo(u32 *readOffset); - u32 StreamBufferEnd() { + u32 StreamBufferEnd() const { // The buffer is always aligned to a frame in size, not counting an optional header. // The header will only initially exist after the data is first set. u32 framesAfterHeader = (bufferMaxSize_ - bufferHeaderSize_) / bytesPerFrame_; return framesAfterHeader * bytesPerFrame_ + bufferHeaderSize_; } + void ConsumeFrame() { + bufferPos_ += bytesPerFrame_; + if (bufferValidBytes_ > bytesPerFrame_) { + bufferValidBytes_ -= bytesPerFrame_; + } else { + bufferValidBytes_ = 0; + } + if (bufferPos_ >= StreamBufferEnd()) { + // Wrap around... theoretically, this should only happen at exactly StreamBufferEnd. + bufferPos_ -= StreamBufferEnd(); + bufferHeaderSize_ = 0; + } + } + private: void AnalyzeReset(); }; @@ -717,7 +738,7 @@ int Atrac::Analyze(u32 addr, u32 size) { // TODO: Validate stuff. if (Memory::Read_U32(first_.addr) != RIFF_CHUNK_MAGIC) { - return hleReportError(ME, ATRAC_ERROR_UNKNOWN_FORMAT, "invalid RIF header"); + return hleReportError(ME, ATRAC_ERROR_UNKNOWN_FORMAT, "invalid RIFF header"); } u32 offset = 8; @@ -1028,20 +1049,28 @@ void Atrac::CalculateStreamInfo(u32 *outReadOffset) { first_.writableBytes = bufferPos_ - bufferStartUsed; } + if (readOffset >= first_.filesize) { + if (bufferState_ == ATRAC_STATUS_STREAMED_WITHOUT_LOOP) { + // We don't need anything more, so all 0s. + readOffset = 0; + first_.offset = 0; + first_.writableBytes = 0; + } else { + readOffset = FileOffsetBySample(loopStartSample_ - FirstOffsetExtra() - firstSampleOffset_ - SamplesPerFrame() * 2); + } + } + + if (readOffset + first_.writableBytes > first_.filesize) { + // Never ask for past the end of file, even when the space is free. + first_.writableBytes = first_.filesize - readOffset; + } + // If you don't think this should be here, remove it. It's just a temporary safety check. if (first_.offset + first_.writableBytes > bufferMaxSize_) { ERROR_LOG_REPORT(ME, "Somehow calculated too many writable bytes: %d + %d > %d", first_.offset, first_.writableBytes, bufferMaxSize_); first_.offset = 0; first_.writableBytes = bufferMaxSize_; } - - if (readOffset >= first_.filesize) { - if (bufferState_ == ATRAC_STATUS_STREAMED_WITHOUT_LOOP) { - readOffset = 0; - } else { - readOffset = dataOff_; - } - } } if (outReadOffset) { @@ -1133,6 +1162,12 @@ u32 _AtracDecodeData(int atracID, u8 *outbuf, u32 outbufPtr, u32 *SamplesNum, u3 skipSamples = unalignedSamples; } + if (skipSamples != 0 && atrac->bufferHeaderSize_ == 0) { + // Skip the initial frame used to load state for the looped frame. + // TODO: We will want to actually read this in. + atrac->ConsumeFrame(); + } + if (!atrac->failedDecode_ && (atrac->codecType_ == PSP_MODE_AT_3 || atrac->codecType_ == PSP_MODE_AT_3_PLUS)) { atrac->SeekToSample(atrac->currentSample_); @@ -1209,17 +1244,7 @@ u32 _AtracDecodeData(int atracID, u8 *outbuf, u32 outbufPtr, u32 *SamplesNum, u3 atrac->currentSample_ += numSamples; atrac->decodePos_ = atrac->DecodePosBySample(atrac->currentSample_); - atrac->bufferPos_ += atrac->bytesPerFrame_; - if (atrac->bufferValidBytes_ > atrac->bytesPerFrame_) { - atrac->bufferValidBytes_ -= atrac->bytesPerFrame_; - } else { - atrac->bufferValidBytes_ = 0; - } - if (atrac->bufferPos_ >= atrac->StreamBufferEnd()) { - // Wrap around... theoretically, this should only happen at exactly StreamBufferEnd. - atrac->bufferPos_ -= atrac->StreamBufferEnd(); - atrac->bufferHeaderSize_ = 0; - } + atrac->ConsumeFrame(); int finishFlag = 0; // TODO: Verify. @@ -1231,6 +1256,11 @@ u32 _AtracDecodeData(int atracID, u8 *outbuf, u32 outbufPtr, u32 *SamplesNum, u3 if (atrac->loopNum_ > 0) atrac->loopNum_--; } + if ((atrac->bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK) { + // Whatever bytes we have left were added from the loop. + atrac->first_.fileoffset = atrac->FileOffsetBySample(atrac->loopStartSample_ - atrac->FirstOffsetExtra() - atrac->firstSampleOffset_ - atrac->SamplesPerFrame() * 2); + // Skip the initial frame at the start. + } } else if (hitEnd) { finishFlag = 1; @@ -1692,7 +1722,7 @@ static u32 sceAtracResetPlayPosition(int atracID, int sample, int bytesWrittenFi atrac->bufferHeaderSize_ = 0; atrac->bufferPos_ = atrac->bytesPerFrame_; - atrac->bufferValidBytes_ = bytesWrittenFirstBuf; + atrac->bufferValidBytes_ = bytesWrittenFirstBuf - atrac->bufferPos_; } if (atrac->codecType_ == PSP_MODE_AT_3 || atrac->codecType_ == PSP_MODE_AT_3_PLUS) { diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp index e6c59a9f2e..e1122ab11d 100644 --- a/GPU/Common/FramebufferCommon.cpp +++ b/GPU/Common/FramebufferCommon.cpp @@ -497,7 +497,14 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width, vfb->last_frame_render = gpuStats.numFlips; } - // TODO: Check width? + if (vfb->fb_stride < width) { + DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width); + const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2; + ResizeFramebufFBO(vfb, width, size / (bpp * width)); + vfb->fb_stride = width; + // This might be a bit wider than necessary, but we'll redetect on next render. + vfb->width = vfb->width = width; + } } } diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index 91eb9259d4..7c9a3675f4 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -552,8 +552,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo out.viewportY = renderY + displayOffsetY; out.viewportW = curRTWidth * renderWidthFactor; out.viewportH = curRTHeight * renderHeightFactor; - out.depthRangeMin = 0.0f; - out.depthRangeMax = 1.0f; + out.depthRangeMin = ToScaledDepth(0); + out.depthRangeMax = ToScaledDepth(65535); } else { // These we can turn into a glViewport call, offset by offsetX and offsetY. Math after. float vpXScale = gstate.getViewportXScale(); @@ -634,32 +634,27 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo float vpZScale = gstate.getViewportZScale(); float vpZCenter = gstate.getViewportZCenter(); - float depthRangeMin = vpZCenter - vpZScale; - float depthRangeMax = vpZCenter + vpZScale; - // Near/far can be inverted. Let's reverse while dealing with clamping, though. - bool inverted = vpZScale < 0.0f; - float near = (inverted ? depthRangeMax : depthRangeMin) * (1.0f / 65535.0f); - float far = (inverted ? depthRangeMin : depthRangeMax) * (1.0f / 65535.0f); + // Near/far can be inverted. We deal with that in the projection/scale. + float near = vpZCenter - fabsf(vpZScale); + float far = vpZCenter + fabsf(vpZScale); - if (near < 0.0f || far > 1.0f) { + if (near < 0.0f || far > 65535.0f) { float overageNear = std::max(-near, 0.0f); - float overageFar = std::max(far - 1.0f, 0.0f); + float overageFar = std::max(far - 65535.0f, 0.0f); float drift = overageFar - overageNear; near += overageNear; far -= overageFar; - zScale = fabsf(vpZScale * (2.0f / 65535.0f)) / (far - near); + zScale = (vpZScale * 2.0f) / (far - near); zOffset = drift / (far - near); - } - - if (inverted) { + } else if (vpZScale < 0.0f) { + // This flips to match our near/far. zScale = -zScale; - inverted = false; } - out.depthRangeMin = inverted ? far : near; - out.depthRangeMax = inverted ? near : far; + out.depthRangeMin = near * (1.0f / 65535.0f); + out.depthRangeMax = far * (1.0f / 65535.0f); bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale; bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset; @@ -678,18 +673,21 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo #ifndef MOBILE_DEVICE float minz = gstate.getDepthRangeMin(); float maxz = gstate.getDepthRangeMax(); - if ((minz > depthRangeMin && minz > depthRangeMax) || (maxz < depthRangeMin && maxz < depthRangeMax)) { - WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range in test - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); - } else if ((gstate.clipEnable & 1) == 0) { - // TODO: Need to test whether clipEnable should even affect depth or not. - if ((minz < depthRangeMin && minz < depthRangeMax) || (maxz > depthRangeMin && maxz > depthRangeMax)) { - WARN_LOG_REPORT_ONCE(znoclip, G3D, "Unsupported depth range in test without clipping - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz); + if (minz > near || maxz < far) { + if ((gstate.clipEnable & 1) == 0) { + WARN_LOG_REPORT_ONCE(minmaxznoclip, G3D, "Unsupported depth range test without clipping - clip: %f-%f, test: %f-%f", near, far, minz, maxz); + } else { + WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range test - clip: %f-%f, test: %f-%f", near, far, minz, maxz); } } #endif } } +float ToScaledDepth(u16 z) { + return z * (1.0f / 65535.0f); +} + static const BlendFactor genericALookup[11] = { BlendFactor::DST_COLOR, BlendFactor::ONE_MINUS_DST_COLOR, diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h index 59e3a87d31..dffb2e4bc3 100644 --- a/GPU/Common/GPUStateUtils.h +++ b/GPU/Common/GPUStateUtils.h @@ -66,6 +66,7 @@ struct ViewportAndScissor { bool dirtyDepth; }; void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out); +float ToScaledDepth(u16 z); // These are common to all modern APIs and can be easily converted with a lookup table. enum class BlendFactor : uint8_t { diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index f35d86e8a7..1a541e1681 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -21,11 +21,12 @@ #include "Core/Config.h" #include "GPU/GPUState.h" #include "GPU/Math3D.h" -#include "GPU/Common/VertexDecoderCommon.h" -#include "GPU/Common/TransformCommon.h" #include "GPU/Common/FramebufferCommon.h" -#include "GPU/Common/TextureCacheCommon.h" +#include "GPU/Common/GPUStateUtils.h" #include "GPU/Common/SoftwareTransformCommon.h" +#include "GPU/Common/TransformCommon.h" +#include "GPU/Common/TextureCacheCommon.h" +#include "GPU/Common/VertexDecoderCommon.h" // This is the software transform pipeline, which is necessary for supporting RECT // primitives correctly without geometry shaders, and may be easier to use for @@ -406,7 +407,8 @@ void SoftwareTransform( // TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not. if (maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(transformed, maxIndex) && gl_extensions.gpuVendor != GPU_VENDOR_POWERVR) { // && g_Config.iRenderingMode != FB_NON_BUFFERED_MODE) { result->color = transformed[0].color0_32; - result->depth = transformed[0].z; + // Need to rescale from a [0, 1] float. This is the final transformed value. + result->depth = ToScaledDepth((s16)(int)(transformed[0].z * 65535.0f)); result->action = SW_CLEAR; return; } diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 63f61d3eef..9318ec0784 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -58,7 +58,7 @@ namespace DX9 { dxstate.stencilFunc.set(D3DCMP_ALWAYS, 0, 0); dxstate.stencilMask.set(0xFF); } - pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_ARGB(0, 0, 0, 0), 0, 0); + pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_ARGB(0, 0, 0, 0), ToScaledDepth(0), 0); if (keepState) { dxstate.scissorTest.restore(); dxstate.depthWrite.restore(); diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index 3fbc371f57..8ba44fcd36 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -89,10 +89,11 @@ void FramebufferManager::ClearBuffer(bool keepState) { } glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glClearStencil(0); + float clearDepth = ToScaledDepth(0); #ifdef USING_GLES2 - glClearDepthf(0.0f); + glClearDepthf(clearDepth); #else - glClearDepth(0.0); + glClearDepth(clearDepth); #endif glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); if (keepState) { diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 322a91ffde..7676d53ed8 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -604,8 +604,8 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) { // The projection already accounts for those, so we need to reverse them. // // Additionally, OpenGL uses a range from [-1, 1]. So we multiply by scale and add the center. - viewZScale *= (1.0f / gstate_c.vpDepthScale); - viewZCenter -= 65535.0f * (gstate_c.vpZOffset); + viewZScale *= 1.0f / gstate_c.vpDepthScale; + viewZCenter -= 65535.0f * gstate_c.vpZOffset; if (viewZScale != 0.0) { viewZInvScale = 1.0f / viewZScale;