diff --git a/Core/HLE/sceAtrac.cpp b/Core/HLE/sceAtrac.cpp
index 3de58f617a..f8c312e5a6 100644
--- a/Core/HLE/sceAtrac.cpp
+++ b/Core/HLE/sceAtrac.cpp
@@ -365,16 +365,23 @@ struct Atrac {
 		}
 
 		u32 currentFileOffset = FileOffsetBySample(currentSample_ - SamplesPerFrame() + FirstOffsetExtra());
-		if ((bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK) {
-			if (currentFileOffset > first_.fileoffset) {
-				// We've looped in the data we added.
-				return PSP_ATRAC_LOOP_STREAM_DATA_IS_ON_MEMORY;
-			}
-
-			if (first_.fileoffset >= first_.filesize && loopNum_ == 0) {
-				// We don't need anything more; we're not planning to loop again.
+		if (first_.fileoffset >= first_.filesize) {
+			if (bufferState_ == ATRAC_STATUS_STREAMED_WITHOUT_LOOP) {
 				return PSP_ATRAC_NONLOOP_STREAM_DATA_IS_ON_MEMORY;
 			}
+			int loopEndAdjusted = loopEndSample_ - FirstOffsetExtra() - firstSampleOffset_;
+			if (bufferState_ == ATRAC_STATUS_STREAMED_LOOP_WITH_TRAILER && currentSample_ > loopEndAdjusted) {
+				// No longer looping in this case, outside the loop.
+				return PSP_ATRAC_NONLOOP_STREAM_DATA_IS_ON_MEMORY;
+			}
+			if ((bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK && loopNum_ == 0) {
+				return PSP_ATRAC_LOOP_STREAM_DATA_IS_ON_MEMORY;
+			}
+		}
+
+		if ((bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK) {
+			// Since we're streaming, the remaining frames are what's valid in the buffer.
+			return bufferValidBytes_ / bytesPerFrame_;
 		}
 
 		// Since the first frame is shorter by this offset, add to round up at this offset.
@@ -563,13 +570,27 @@ struct Atrac {
 
 	void CalculateStreamInfo(u32 *readOffset);
 
-	u32 StreamBufferEnd() {
+	u32 StreamBufferEnd() const {
 		// The buffer is always aligned to a frame in size, not counting an optional header.
 		// The header will only initially exist after the data is first set.
 		u32 framesAfterHeader = (bufferMaxSize_ - bufferHeaderSize_) / bytesPerFrame_;
 		return framesAfterHeader * bytesPerFrame_ + bufferHeaderSize_;
 	}
 
+	void ConsumeFrame() {
+		bufferPos_ += bytesPerFrame_;
+		if (bufferValidBytes_ > bytesPerFrame_) {
+			bufferValidBytes_ -= bytesPerFrame_;
+		} else {
+			bufferValidBytes_ = 0;
+		}
+		if (bufferPos_ >= StreamBufferEnd()) {
+			// Wrap around... theoretically, this should only happen at exactly StreamBufferEnd.
+			bufferPos_ -= StreamBufferEnd();
+			bufferHeaderSize_ = 0;
+		}
+	}
+
 private:
 	void AnalyzeReset();
 };
@@ -717,7 +738,7 @@ int Atrac::Analyze(u32 addr, u32 size) {
 	// TODO: Validate stuff.
 
 	if (Memory::Read_U32(first_.addr) != RIFF_CHUNK_MAGIC) {
-		return hleReportError(ME, ATRAC_ERROR_UNKNOWN_FORMAT, "invalid RIF header");
+		return hleReportError(ME, ATRAC_ERROR_UNKNOWN_FORMAT, "invalid RIFF header");
 	}
 
 	u32 offset = 8;
@@ -1028,20 +1049,28 @@ void Atrac::CalculateStreamInfo(u32 *outReadOffset) {
 			first_.writableBytes = bufferPos_ - bufferStartUsed;
 		}
 
+		if (readOffset >= first_.filesize) {
+			if (bufferState_ == ATRAC_STATUS_STREAMED_WITHOUT_LOOP) {
+				// We don't need anything more, so all 0s.
+				readOffset = 0;
+				first_.offset = 0;
+				first_.writableBytes = 0;
+			} else {
+				readOffset = FileOffsetBySample(loopStartSample_ - FirstOffsetExtra() - firstSampleOffset_ - SamplesPerFrame() * 2);
+			}
+		}
+
+		if (readOffset + first_.writableBytes > first_.filesize) {
+			// Never ask for past the end of file, even when the space is free.
+			first_.writableBytes = first_.filesize - readOffset;
+		}
+
 		// If you don't think this should be here, remove it.  It's just a temporary safety check.
 		if (first_.offset + first_.writableBytes > bufferMaxSize_) {
 			ERROR_LOG_REPORT(ME, "Somehow calculated too many writable bytes: %d + %d > %d", first_.offset, first_.writableBytes, bufferMaxSize_);
 			first_.offset = 0;
 			first_.writableBytes = bufferMaxSize_;
 		}
-
-		if (readOffset >= first_.filesize) {
-			if (bufferState_ == ATRAC_STATUS_STREAMED_WITHOUT_LOOP) {
-				readOffset = 0;
-			} else {
-				readOffset = dataOff_;
-			}
-		}
 	}
 
 	if (outReadOffset) {
@@ -1133,6 +1162,12 @@ u32 _AtracDecodeData(int atracID, u8 *outbuf, u32 outbufPtr, u32 *SamplesNum, u3
 				skipSamples = unalignedSamples;
 			}
 
+			if (skipSamples != 0 && atrac->bufferHeaderSize_ == 0) {
+				// Skip the initial frame used to load state for the looped frame.
+				// TODO: We will want to actually read this in.
+				atrac->ConsumeFrame();
+			}
+
 			if (!atrac->failedDecode_ && (atrac->codecType_ == PSP_MODE_AT_3 || atrac->codecType_ == PSP_MODE_AT_3_PLUS)) {
 				atrac->SeekToSample(atrac->currentSample_);
 
@@ -1209,17 +1244,7 @@ u32 _AtracDecodeData(int atracID, u8 *outbuf, u32 outbufPtr, u32 *SamplesNum, u3
 			atrac->currentSample_ += numSamples;
 			atrac->decodePos_ = atrac->DecodePosBySample(atrac->currentSample_);
 
-			atrac->bufferPos_ += atrac->bytesPerFrame_;
-			if (atrac->bufferValidBytes_ > atrac->bytesPerFrame_) {
-				atrac->bufferValidBytes_ -= atrac->bytesPerFrame_;
-			} else {
-				atrac->bufferValidBytes_ = 0;
-			}
-			if (atrac->bufferPos_ >= atrac->StreamBufferEnd()) {
-				// Wrap around... theoretically, this should only happen at exactly StreamBufferEnd.
-				atrac->bufferPos_ -= atrac->StreamBufferEnd();
-				atrac->bufferHeaderSize_ = 0;
-			}
+			atrac->ConsumeFrame();
 
 			int finishFlag = 0;
 			// TODO: Verify.
@@ -1231,6 +1256,11 @@ u32 _AtracDecodeData(int atracID, u8 *outbuf, u32 outbufPtr, u32 *SamplesNum, u3
 					if (atrac->loopNum_ > 0)
 						atrac->loopNum_--;
 				}
+				if ((atrac->bufferState_ & ATRAC_STATUS_STREAMED_MASK) == ATRAC_STATUS_STREAMED_MASK) {
+					// Whatever bytes we have left were added from the loop.
+					atrac->first_.fileoffset = atrac->FileOffsetBySample(atrac->loopStartSample_ - atrac->FirstOffsetExtra() - atrac->firstSampleOffset_ - atrac->SamplesPerFrame() * 2);
+					// Skip the initial frame at the start.
+				}
 			} else if (hitEnd) {
 				finishFlag = 1;
 
@@ -1692,7 +1722,7 @@ static u32 sceAtracResetPlayPosition(int atracID, int sample, int bytesWrittenFi
 
 			atrac->bufferHeaderSize_ = 0;
 			atrac->bufferPos_ = atrac->bytesPerFrame_;
-			atrac->bufferValidBytes_ = bytesWrittenFirstBuf;
+			atrac->bufferValidBytes_ = bytesWrittenFirstBuf - atrac->bufferPos_;
 		}
 
 		if (atrac->codecType_ == PSP_MODE_AT_3 || atrac->codecType_ == PSP_MODE_AT_3_PLUS) {
diff --git a/GPU/Common/FramebufferCommon.cpp b/GPU/Common/FramebufferCommon.cpp
index e6c59a9f2e..e1122ab11d 100644
--- a/GPU/Common/FramebufferCommon.cpp
+++ b/GPU/Common/FramebufferCommon.cpp
@@ -497,7 +497,14 @@ void FramebufferManagerCommon::NotifyVideoUpload(u32 addr, int size, int width,
 			vfb->last_frame_render = gpuStats.numFlips;
 		}
 
-		// TODO: Check width?
+		if (vfb->fb_stride < width) {
+			DEBUG_LOG(ME, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, width);
+			const int bpp = fmt == GE_FORMAT_8888 ? 4 : 2;
+			ResizeFramebufFBO(vfb, width, size / (bpp * width));
+			vfb->fb_stride = width;
+			// This might be a bit wider than necessary, but we'll redetect on next render.
+			vfb->width = vfb->width = width;
+		}
 	}
 }
 
diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp
index 91eb9259d4..7c9a3675f4 100644
--- a/GPU/Common/GPUStateUtils.cpp
+++ b/GPU/Common/GPUStateUtils.cpp
@@ -552,8 +552,8 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
 		out.viewportY = renderY + displayOffsetY;
 		out.viewportW = curRTWidth * renderWidthFactor;
 		out.viewportH = curRTHeight * renderHeightFactor;
-		out.depthRangeMin = 0.0f;
-		out.depthRangeMax = 1.0f;
+		out.depthRangeMin = ToScaledDepth(0);
+		out.depthRangeMax = ToScaledDepth(65535);
 	} else {
 		// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
 		float vpXScale = gstate.getViewportXScale();
@@ -634,32 +634,27 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
 
 		float vpZScale = gstate.getViewportZScale();
 		float vpZCenter = gstate.getViewportZCenter();
-		float depthRangeMin = vpZCenter - vpZScale;
-		float depthRangeMax = vpZCenter + vpZScale;
-		// Near/far can be inverted.  Let's reverse while dealing with clamping, though.
-		bool inverted = vpZScale < 0.0f;
-		float near = (inverted ? depthRangeMax : depthRangeMin) * (1.0f / 65535.0f);
-		float far = (inverted ? depthRangeMin : depthRangeMax) * (1.0f / 65535.0f);
+		// Near/far can be inverted.  We deal with that in the projection/scale.
+		float near = vpZCenter - fabsf(vpZScale);
+		float far = vpZCenter + fabsf(vpZScale);
 
-		if (near < 0.0f || far > 1.0f) {
+		if (near < 0.0f || far > 65535.0f) {
 			float overageNear = std::max(-near, 0.0f);
-			float overageFar = std::max(far - 1.0f, 0.0f);
+			float overageFar = std::max(far - 65535.0f, 0.0f);
 			float drift = overageFar - overageNear;
 
 			near += overageNear;
 			far -= overageFar;
 
-			zScale = fabsf(vpZScale * (2.0f / 65535.0f)) / (far - near);
+			zScale = (vpZScale * 2.0f) / (far - near);
 			zOffset = drift / (far - near);
-		}
-
-		if (inverted) {
+		} else if (vpZScale < 0.0f) {
+			// This flips to match our near/far.
 			zScale = -zScale;
-			inverted = false;
 		}
 
-		out.depthRangeMin = inverted ? far : near;
-		out.depthRangeMax = inverted ? near : far;
+		out.depthRangeMin = near * (1.0f / 65535.0f);
+		out.depthRangeMax = far * (1.0f / 65535.0f);
 
 		bool scaleChanged = gstate_c.vpWidthScale != wScale || gstate_c.vpHeightScale != hScale;
 		bool offsetChanged = gstate_c.vpXOffset != xOffset || gstate_c.vpYOffset != yOffset;
@@ -678,18 +673,21 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
 #ifndef MOBILE_DEVICE
 		float minz = gstate.getDepthRangeMin();
 		float maxz = gstate.getDepthRangeMax();
-		if ((minz > depthRangeMin && minz > depthRangeMax) || (maxz < depthRangeMin && maxz < depthRangeMax)) {
-			WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range in test - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz);
-		} else if ((gstate.clipEnable & 1) == 0) {
-			// TODO: Need to test whether clipEnable should even affect depth or not.
-			if ((minz < depthRangeMin && minz < depthRangeMax) || (maxz > depthRangeMin && maxz > depthRangeMax)) {
-				WARN_LOG_REPORT_ONCE(znoclip, G3D, "Unsupported depth range in test without clipping - depth range: %f-%f, test: %f-%f", depthRangeMin, depthRangeMax, minz, maxz);
+		if (minz > near || maxz < far) {
+			if ((gstate.clipEnable & 1) == 0) {
+				WARN_LOG_REPORT_ONCE(minmaxznoclip, G3D, "Unsupported depth range test without clipping - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
+			} else {
+				WARN_LOG_REPORT_ONCE(minmaxz, G3D, "Unsupported depth range test - clip: %f-%f, test: %f-%f", near, far, minz, maxz);
 			}
 		}
 #endif
 	}
 }
 
+float ToScaledDepth(u16 z) {
+	return z * (1.0f / 65535.0f);
+}
+
 static const BlendFactor genericALookup[11] = {
 	BlendFactor::DST_COLOR,
 	BlendFactor::ONE_MINUS_DST_COLOR,
diff --git a/GPU/Common/GPUStateUtils.h b/GPU/Common/GPUStateUtils.h
index 59e3a87d31..dffb2e4bc3 100644
--- a/GPU/Common/GPUStateUtils.h
+++ b/GPU/Common/GPUStateUtils.h
@@ -66,6 +66,7 @@ struct ViewportAndScissor {
 	bool dirtyDepth;
 };
 void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
+float ToScaledDepth(u16 z);
 
 // These are common to all modern APIs and can be easily converted with a lookup table.
 enum class BlendFactor : uint8_t {
diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp
index f35d86e8a7..1a541e1681 100644
--- a/GPU/Common/SoftwareTransformCommon.cpp
+++ b/GPU/Common/SoftwareTransformCommon.cpp
@@ -21,11 +21,12 @@
 #include "Core/Config.h"
 #include "GPU/GPUState.h"
 #include "GPU/Math3D.h"
-#include "GPU/Common/VertexDecoderCommon.h"
-#include "GPU/Common/TransformCommon.h"
 #include "GPU/Common/FramebufferCommon.h"
-#include "GPU/Common/TextureCacheCommon.h"
+#include "GPU/Common/GPUStateUtils.h"
 #include "GPU/Common/SoftwareTransformCommon.h"
+#include "GPU/Common/TransformCommon.h"
+#include "GPU/Common/TextureCacheCommon.h"
+#include "GPU/Common/VertexDecoderCommon.h"
 
 // This is the software transform pipeline, which is necessary for supporting RECT
 // primitives correctly without geometry shaders, and may be easier to use for
@@ -406,7 +407,8 @@ void SoftwareTransform(
 	// TODO: This bleeds outside the play area in non-buffered mode. Big deal? Probably not.
 	if (maxIndex > 1 && gstate.isModeClear() && prim == GE_PRIM_RECTANGLES && IsReallyAClear(transformed, maxIndex) && gl_extensions.gpuVendor != GPU_VENDOR_POWERVR) {  // && g_Config.iRenderingMode != FB_NON_BUFFERED_MODE) {
 		result->color = transformed[0].color0_32;
-		result->depth = transformed[0].z;
+		// Need to rescale from a [0, 1] float.  This is the final transformed value.
+		result->depth = ToScaledDepth((s16)(int)(transformed[0].z * 65535.0f));
 		result->action = SW_CLEAR;
 		return;
 	}
diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp
index 63f61d3eef..9318ec0784 100644
--- a/GPU/Directx9/FramebufferDX9.cpp
+++ b/GPU/Directx9/FramebufferDX9.cpp
@@ -58,7 +58,7 @@ namespace DX9 {
 			dxstate.stencilFunc.set(D3DCMP_ALWAYS, 0, 0);
 			dxstate.stencilMask.set(0xFF);
 		}
-		pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_ARGB(0, 0, 0, 0), 0, 0);
+		pD3Ddevice->Clear(0, NULL, D3DCLEAR_STENCIL|D3DCLEAR_TARGET |D3DCLEAR_ZBUFFER, D3DCOLOR_ARGB(0, 0, 0, 0), ToScaledDepth(0), 0);
 		if (keepState) {
 			dxstate.scissorTest.restore();
 			dxstate.depthWrite.restore();
diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp
index 3fbc371f57..8ba44fcd36 100644
--- a/GPU/GLES/Framebuffer.cpp
+++ b/GPU/GLES/Framebuffer.cpp
@@ -89,10 +89,11 @@ void FramebufferManager::ClearBuffer(bool keepState) {
 	}
 	glClearColor(0.0f, 0.0f, 0.0f, 0.0f);
 	glClearStencil(0);
+	float clearDepth = ToScaledDepth(0);
 #ifdef USING_GLES2
-	glClearDepthf(0.0f);
+	glClearDepthf(clearDepth);
 #else
-	glClearDepth(0.0);
+	glClearDepth(clearDepth);
 #endif
 	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
 	if (keepState) {
diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp
index 322a91ffde..7676d53ed8 100644
--- a/GPU/GLES/ShaderManager.cpp
+++ b/GPU/GLES/ShaderManager.cpp
@@ -604,8 +604,8 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
 		// The projection already accounts for those, so we need to reverse them.
 		//
 		// Additionally, OpenGL uses a range from [-1, 1].  So we multiply by scale and add the center.
-		viewZScale *= (1.0f / gstate_c.vpDepthScale);
-		viewZCenter -= 65535.0f * (gstate_c.vpZOffset);
+		viewZScale *= 1.0f / gstate_c.vpDepthScale;
+		viewZCenter -= 65535.0f * gstate_c.vpZOffset;
 
 		if (viewZScale != 0.0) {
 			viewZInvScale = 1.0f / viewZScale;