Merge branch 'master' into feature_openxr_vulkan

This commit is contained in:
Lubos 2022-10-09 17:49:17 +02:00
commit 43f848a2aa
22 changed files with 572 additions and 217 deletions

View File

@ -398,15 +398,3 @@ public:
#undef STATE2
extern DirectXState dxstate;
struct GLExtensions {
bool OES_depth24;
bool OES_packed_depth_stencil;
bool OES_depth_texture;
bool EXT_discard_framebuffer;
bool FBO_ARB;
};
extern GLExtensions gl_extensions;
void CheckGLExtensions();

View File

@ -201,11 +201,19 @@ public:
D3DTEXTUREFILTERTYPE magFilt, minFilt, mipFilt;
void Apply(LPDIRECT3DDEVICE9 device, int index) {
dxstate.texAddressU.set(wrapS);
dxstate.texAddressV.set(wrapT);
dxstate.texMagFilter.set(magFilt);
dxstate.texMinFilter.set(minFilt);
dxstate.texMipFilter.set(mipFilt);
if (index == 0) {
dxstate.texAddressU.set(wrapS);
dxstate.texAddressV.set(wrapT);
dxstate.texMagFilter.set(magFilt);
dxstate.texMinFilter.set(minFilt);
dxstate.texMipFilter.set(mipFilt);
} else {
pD3Ddevice9->SetSamplerState(index, D3DSAMP_ADDRESSU, wrapS);
pD3Ddevice9->SetSamplerState(index, D3DSAMP_ADDRESSV, wrapT);
pD3Ddevice9->SetSamplerState(index, D3DSAMP_MAGFILTER, magFilt);
pD3Ddevice9->SetSamplerState(index, D3DSAMP_MINFILTER, minFilt);
pD3Ddevice9->SetSamplerState(index, D3DSAMP_MIPFILTER, mipFilt);
}
}
};

View File

@ -1,4 +1,5 @@
#include "Common/GPU/OpenGL/DataFormatGL.h"
#include "Common/GPU/OpenGL/GLFeatures.h"
#include "Common/Log.h"
namespace Draw {
@ -15,8 +16,16 @@ bool Thin3DFormatToGLFormatAndType(DataFormat fmt, GLuint &internalFormat, GLuin
break;
case DataFormat::R8_UNORM:
internalFormat = GL_RGBA;
format = GL_RED;
if (gl_extensions.IsGLES) {
internalFormat = GL_LUMINANCE;
format = GL_LUMINANCE;
} else if (gl_extensions.VersionGEThan(3, 0)) {
internalFormat = GL_RED;
format = GL_RED;
} else {
internalFormat = GL_RGBA;
format = GL_RED;
}
type = GL_UNSIGNED_BYTE;
alignment = 1;
break;

View File

@ -848,8 +848,9 @@ VKContext::VKContext(VulkanContext *vulkan)
}
// Older ARM devices have very slow geometry shaders, not worth using. At least before 15.
if (majorVersion <= 15) {
bugs_.Infest(Bugs::GEOMETRY_SHADERS_SLOW);
// Also seen to cause weird issues on 18, so let's lump it in.
if (majorVersion <= 18) {
bugs_.Infest(Bugs::GEOMETRY_SHADERS_SLOW_OR_BROKEN);
}
}

View File

@ -333,7 +333,7 @@ public:
RASPBERRY_SHADER_COMP_HANG = 8,
MALI_CONSTANT_LOAD_BUG = 9,
SUBPASS_FEEDBACK_BROKEN = 10,
GEOMETRY_SHADERS_SLOW = 11,
GEOMETRY_SHADERS_SLOW_OR_BROKEN = 11,
MAX_BUG,
};

View File

@ -428,7 +428,7 @@ void WebSocketHLEFuncScan(DebuggerRequest &req) {
if (!Memory::IsValidRange(addr, size))
return req.Fail("Address or size outside valid memory");
bool insertSymbols = MIPSAnalyst::ScanForFunctions(addr, addr + size, true);
bool insertSymbols = MIPSAnalyst::ScanForFunctions(addr, addr + size - 1, true);
MIPSAnalyst::FinalizeScan(insertSymbols);
req.Respond();

View File

@ -691,6 +691,35 @@ static int Hook_godseaterburst_blit_texture() {
return 0;
}
static int Hook_godseaterburst_depthmask_5551() {
// This function copies the 5551 framebuffer to a temporary, generating alpha based on depth.
// Depth is optional, in which case all pixels get full alpha.
// Called when your avatar changes to screenshot for save data.
uint32_t colorBuffer = currentMIPS->r[MIPS_REG_A1];
uint32_t depthBuffer = currentMIPS->r[MIPS_REG_T2];
uint32_t byteStride = currentMIPS->r[MIPS_REG_A2];
uint32_t height = currentMIPS->r[MIPS_REG_T1];
uint32_t size = byteStride * height;
if (!Memory::IsVRAMAddress(colorBuffer) || !Memory::IsValidRange(colorBuffer, size))
return 0;
if (depthBuffer != 0) {
if (!Memory::IsVRAMAddress(colorBuffer) || !Memory::IsValidRange(depthBuffer, size))
return 0;
// This is added to read from the linearized mirror.
uint32_t depthMirror = depthBuffer + 0x00200000;
// Depth download required, or it won't work and will be transparent.
gpu->PerformMemoryCopy(depthMirror, depthMirror, size, GPUCopyFlag::FORCE_DST_MEM | GPUCopyFlag::DEPTH_REQUESTED);
NotifyMemInfo(MemBlockFlags::WRITE, depthMirror, size, "godseaterburst_depthmask_5551");
}
gpu->PerformMemoryDownload(colorBuffer, size);
NotifyMemInfo(MemBlockFlags::WRITE, colorBuffer, size, "godseaterburst_depthmask_5551");
return 0;
}
static int Hook_hexyzforce_monoclome_thread() {
u32 fb_info;
if (!GetMIPSStaticAddress(fb_info, -4, 0)) {
@ -1414,6 +1443,7 @@ static const ReplacementTableEntry entries[] = {
// { "vmmul_q_transp", &Replace_vmmul_q_transp, 0, REPFLAG_DISABLED },
{ "godseaterburst_blit_texture", &Hook_godseaterburst_blit_texture, 0, REPFLAG_HOOKENTER },
{ "godseaterburst_depthmask_5551", &Hook_godseaterburst_depthmask_5551, 0, REPFLAG_HOOKENTER },
{ "hexyzforce_monoclome_thread", &Hook_hexyzforce_monoclome_thread, 0, REPFLAG_HOOKENTER, 0x58 },
{ "starocean_write_stencil", &Hook_starocean_write_stencil, 0, REPFLAG_HOOKENTER, 0x260 },
{ "topx_create_saveicon", &Hook_topx_create_saveicon, 0, REPFLAG_HOOKENTER, 0x34 },

View File

@ -21,10 +21,11 @@
#include "Common/CommonTypes.h"
#include "Common/Serialize/Serializer.h"
#include "Common/Serialize/SerializeFuncs.h"
#include "Core/Debugger/MemBlockInfo.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/FunctionWrappers.h"
#include "Core/HLE/sceJpeg.h"
#include "Core/HLE/sceMpeg.h"
#include "Core/HLE/sceKernel.h"
#include "GPU/GPUCommon.h"
#include "Core/MemMap.h"
#include "Core/Reporting.h"
@ -33,6 +34,8 @@
// #define JPEG_DEBUG
#ifdef JPEG_DEBUG
#include "ext/xxhash.h"
#include "Common/File/FileUtil.h"
#include "Common/StringUtils.h"
#endif
struct u24_be {
@ -43,27 +46,47 @@ struct u24_be {
}
};
static int mjpegWidth, mjpegHeight;
static int mjpegInited = 0;
static int mjpegWidth = 0;
static int mjpegHeight = 0;
void __JpegInit() {
mjpegInited = 0;
mjpegWidth = 0;
mjpegHeight = 0;
}
enum : uint32_t {
ERROR_JPEG_INVALID_DATA = 0x80650004,
ERROR_JPEG_INVALID_COLORSPACE = 0x80650013,
ERROR_JPEG_INVALID_SIZE = 0x80650020,
ERROR_JPEG_NO_SOI = 0x80650023,
ERROR_JPEG_INVALID_STATE = 0x80650039,
ERROR_JPEG_OUT_OF_MEMORY = 0x80650041,
ERROR_JPEG_ALREADY_INIT = 0x80650042,
ERROR_JPEG_INVALID_VALUE = 0x80650051,
};
void __JpegDoState(PointerWrap &p) {
auto s = p.Section("sceJpeg", 1);
auto s = p.Section("sceJpeg", 1, 2);
if (!s)
return;
Do(p, mjpegWidth);
Do(p, mjpegHeight);
if (s >= 2) {
Do(p, mjpegInited);
} else {
mjpegInited = -1;
}
}
static int getWidthHeight(int width, int height) {
return (width << 16) | height;
}
static u32 convertYCbCrToABGR (int y, int cb, int cr) {
// TODO: sceJpegCsc and sceJpegMJpegCsc use different factors.
static u32 convertYCbCrToABGR(int y, int cb, int cr) {
//see http://en.wikipedia.org/wiki/Yuv#Y.27UV444_to_RGB888_conversion for more information.
cb = cb - 128;
cr = cr - 128;
@ -76,141 +99,258 @@ static u32 convertYCbCrToABGR (int y, int cb, int cr) {
if (g > 0xFF) g = 0xFF; if(g < 0) g = 0;
if (b > 0xFF) b = 0xFF; if(b < 0) b = 0;
return 0xFF000000 | (b << 16) | (g << 8) | (r << 0);
return (b << 16) | (g << 8) | (r << 0);
}
static void __JpegCsc(u32 imageAddr, u32 yCbCrAddr, int widthHeight, int bufferWidth) {
int height = widthHeight & 0xFFF;
int width = (widthHeight >> 16) & 0xFFF;
int lineWidth = std::min(width, bufferWidth);
int skipEndOfLine = std::max(0, bufferWidth - lineWidth);
u32_le *imageBuffer = (u32_le *)Memory::GetPointer(imageAddr);
static int JpegCsc(u32 imageAddr, u32 yCbCrAddr, int widthHeight, int bufferWidth, uint32_t chroma, int &usec) {
if ((chroma & 0x000FFFFF) != 0x00020202 && (chroma & 0x000FFFFF) != 0x00020201 && (chroma & 0x000FFFFF) != 0x00020101)
return hleLogError(ME, ERROR_JPEG_INVALID_COLORSPACE, "invalid colorspace");
if (bufferWidth < 0)
bufferWidth = 0;
int height = widthHeight & 0xFFFF;
int width = (widthHeight >> 16) & 0xFFFF;
if (height == 0)
height = 1;
uint8_t widthShift = ((chroma >> 8) & 0x03) - 1;
uint8_t heightShift = (chroma & 0x03) - 1;
int sizeY = width * height;
int sizeCb = sizeY >> (widthShift + heightShift);
uint64_t destSize = ((uint64_t)bufferWidth * (height - 1) + width) * 4;
if (destSize > 0x3FFFFFFF || !Memory::IsValidRange(imageAddr, (uint32_t)destSize))
return hleLogError(ME, ERROR_JPEG_INVALID_VALUE, "invalid dest address or size");
if (sizeY > 0x3FFFFFFF || !Memory::IsValidRange(yCbCrAddr, sizeY + sizeCb + sizeCb))
return hleLogError(ME, ERROR_JPEG_INVALID_VALUE, "invalid src address or size");
u32_le *imageBuffer = (u32_le *)Memory::GetPointerWriteUnchecked(imageAddr);
const u8 *Y = (const u8 *)Memory::GetPointerUnchecked(yCbCrAddr);
const u8 *Cb = Y + sizeY;
const u8 *Cr = Cb + sizeCb;
// Very approximate estimate based on tests on a PSP. Usually under.
usec += 60 + 6 * height + width / 2 + width / 4;
if ((widthHeight & 0x00010001) == 0 && height > 1) {
for (int y = 0; y < height; y += 2) {
for (int x = 0; x < width; x += 2) {
u8 y0 = Y[width * y + x];
u8 y1 = Y[width * y + x + 1];
u8 y2 = Y[width * (y + 1) + x];
u8 y3 = Y[width * (y + 1) + x + 1];
u8 cb = Cb[(width >> widthShift) * (y >> heightShift) + (x >> widthShift)];
u8 cr = Cr[(width >> widthShift) * (y >> heightShift) + (x >> widthShift)];
imageBuffer[bufferWidth * y + x] = convertYCbCrToABGR(y0, cb, cr);
imageBuffer[bufferWidth * y + x + 1] = convertYCbCrToABGR(y1, cb, cr);
imageBuffer[bufferWidth * (y + 1) + x] = convertYCbCrToABGR(y2, cb, cr);
imageBuffer[bufferWidth * (y + 1) + x + 1] = convertYCbCrToABGR(y3, cb, cr);
}
}
} else {
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
u8 yy = Y[width * y + x];
u8 cb = Cb[(width >> widthShift) * (y >> heightShift) + (x >> widthShift)];
u8 cr = Cr[(width >> widthShift) * (y >> heightShift) + (x >> widthShift)];
imageBuffer[bufferWidth * y + x] = convertYCbCrToABGR(yy, cb, cr);
}
}
}
NotifyMemInfo(MemBlockFlags::READ, yCbCrAddr, sizeY + sizeCb + sizeCb, "JpegCsc");
NotifyMemInfo(MemBlockFlags::WRITE, imageAddr, (uint32_t)destSize, "JpegCsc");
if ((widthHeight & 0xFFFF) == 0)
return hleLogSuccessI(ME, -1);
return hleLogSuccessI(ME, 0);
}
static int JpegMJpegCsc(u32 imageAddr, u32 yCbCrAddr, int widthHeight, int bufferWidth, int &usec) {
int height = widthHeight & 0xFFFF;
int width = (widthHeight >> 16) & 0xFFFF;
if (bufferWidth < 0)
bufferWidth = bufferWidth > -901 ? 901 + bufferWidth : 0;
if (height == 0)
height = 1;
int sizeY = width * height;
int sizeCb = sizeY >> 2;
u8 *Y = (u8*)Memory::GetPointer(yCbCrAddr);
u8 *Cb = Y + sizeY;
u8 *Cr = Cb + sizeCb;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; x += 4) {
u8 y0 = Y[x + 0];
u8 y1 = Y[x + 1];
u8 y2 = Y[x + 2];
u8 y3 = Y[x + 3];
u8 cb = *Cb++;
u8 cr = *Cr++;
if (width > 720 || height > 480)
return hleLogError(ME, ERROR_JPEG_INVALID_SIZE, "invalid size, max 720x480");
if (bufferWidth > 1024)
return hleLogError(ME, ERROR_JPEG_INVALID_SIZE, "invalid stride, max 1024");
uint32_t destSize = (bufferWidth * (height - 1) + width) * 4;
if (!Memory::IsValidRange(imageAddr, destSize))
return hleLogError(ME, SCE_KERNEL_ERROR_INVALID_POINTER, "invalid dest address or size");
// Convert to ABGR. This is not a fast way to do it.
u32 abgr0 = convertYCbCrToABGR(y0, cb, cr);
u32 abgr1 = convertYCbCrToABGR(y1, cb, cr);
u32 abgr2 = convertYCbCrToABGR(y2, cb, cr);
u32 abgr3 = convertYCbCrToABGR(y3, cb, cr);
u32_le *imageBuffer = (u32_le *)Memory::GetPointerWriteUnchecked(imageAddr);
const u8 *Y = (const u8 *)Memory::GetPointerUnchecked(yCbCrAddr);
const u8 *Cb = Y + sizeY;
const u8 *Cr = Cb + sizeCb;
// Write ABGR
imageBuffer[x + 0] = abgr0;
imageBuffer[x + 1] = abgr1;
imageBuffer[x + 2] = abgr2;
imageBuffer[x + 3] = abgr3;
}
Y += width;
imageBuffer += width;
imageBuffer += skipEndOfLine;
// Very approximate estimate based on tests on a PSP. Usually under.
// The PSP behaves strangely for heights below 16, not rescheduling and writing fewer bytes.
if (height >= 16) {
usec += 9 * height;
}
if (!Memory::IsValidRange(yCbCrAddr, sizeY + sizeCb + sizeCb)) {
// Seems to write based on zeros? Maybe reuses some other value?
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
imageBuffer[bufferWidth * y + x] = convertYCbCrToABGR(0, 0, 0);
}
}
} else if ((widthHeight & 0x00010001) == 0 && height > 1) {
for (int y = 0; y < height; y += 2) {
for (int x = 0; x < width; x += 2) {
u8 y0 = Y[width * y + x];
u8 y1 = Y[width * y + x + 1];
u8 y2 = Y[width * (y + 1) + x];
u8 y3 = Y[width * (y + 1) + x + 1];
u8 cb = Cb[(width >> 1) * (y >> 1) + (x >> 1)];
u8 cr = Cr[(width >> 1) * (y >> 1) + (x >> 1)];
imageBuffer[bufferWidth * y + x] = convertYCbCrToABGR(y0, cb, cr);
imageBuffer[bufferWidth * y + x + 1] = convertYCbCrToABGR(y1, cb, cr);
imageBuffer[bufferWidth * (y + 1) + x] = convertYCbCrToABGR(y2, cb, cr);
imageBuffer[bufferWidth * (y + 1) + x + 1] = convertYCbCrToABGR(y3, cb, cr);
}
}
NotifyMemInfo(MemBlockFlags::READ, yCbCrAddr, sizeY + sizeCb + sizeCb, "JpegMJpegCsc");
} else {
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
u8 yy = Y[width * y + x];
u8 cb = Cb[(width >> 1) * (y >> 1) + (x >> 1)];
u8 cr = Cr[(width >> 1) * (y >> 1) + (x >> 1)];
imageBuffer[bufferWidth * y + x] = convertYCbCrToABGR(yy, cb, cr);
}
}
NotifyMemInfo(MemBlockFlags::READ, yCbCrAddr, sizeY + sizeCb + sizeCb, "JpegMJpegCsc");
}
NotifyMemInfo(MemBlockFlags::WRITE, imageAddr, destSize, "JpegMJpegCsc");
return hleLogSuccessI(ME, 0);
}
static int sceJpegMJpegCsc(u32 imageAddr, u32 yCbCrAddr, int widthHeight, int bufferWidth) {
__JpegCsc(imageAddr, yCbCrAddr, widthHeight, bufferWidth);
if (mjpegInited == 0)
return hleLogError(ME, 0x80000001, "not yet inited");
int usec = 0;
int result = JpegMJpegCsc(imageAddr, yCbCrAddr, widthHeight, bufferWidth, usec);
int width = (widthHeight >> 16) & 0xFFF;
int height = widthHeight & 0xFFF;
DEBUG_LOG(ME, "sceJpegMJpegCsc(%08x, %08x, (%dx%d), %i)", imageAddr, yCbCrAddr, width, height, bufferWidth);
gpu->NotifyVideoUpload(imageAddr, width * height * 4, width, GE_FORMAT_8888);
return 0;
if (result >= 0)
gpu->NotifyVideoUpload(imageAddr, width * height * 4, width, GE_FORMAT_8888);
if (usec != 0)
return hleDelayResult(result, "jpeg csc", usec);
return result;
}
static u32 convertARGBtoABGR(u32 argb) {
return ((argb & 0xFF00FF00)) | ((argb & 0x000000FF) << 16) | ((argb & 0x00FF0000) >> 16);
}
static int __DecodeJpeg(u32 jpegAddr, int jpegSize, u32 imageAddr) {
const u8 *buf = Memory::GetPointer(jpegAddr);
static int DecodeJpeg(u32 jpegAddr, int jpegSize, u32 imageAddr, int &usec) {
if (!Memory::IsValidRange(jpegAddr, jpegSize))
return hleLogError(ME, ERROR_JPEG_NO_SOI, "invalid jpeg address");
if (jpegSize == 0)
return hleLogError(ME, ERROR_JPEG_INVALID_DATA, "invalid jpeg data");
NotifyMemInfo(MemBlockFlags::READ, jpegAddr, jpegSize, "JpegDecodeMJpeg");
const u8 *buf = Memory::GetPointerUnchecked(jpegAddr);
if (jpegSize < 2 || buf[0] != 0xFF || buf[1] != 0xD8)
return hleLogError(ME, ERROR_JPEG_NO_SOI, "no SOI found, invalid data");
int width, height, actual_components;
unsigned char *jpegBuf = jpgd::decompress_jpeg_image_from_memory(buf, jpegSize, &width, &height, &actual_components, 3);
if (actual_components != 3) {
if (actual_components != 3 && actual_components != 1) {
// The assumption that the image was RGB was wrong...
// Try again.
int components = actual_components;
jpegBuf = jpgd::decompress_jpeg_image_from_memory(buf, jpegSize, &width, &height, &actual_components, components);
}
if (jpegBuf == NULL) {
return getWidthHeight(0, 0);
if (jpegBuf == nullptr) {
return hleLogError(ME, ERROR_JPEG_INVALID_DATA, "unable to decompress jpeg");
}
if (actual_components == 3) {
u24_be *imageBuffer = (u24_be*)jpegBuf;
u32_le *abgr = (u32_le *)Memory::GetPointer(imageAddr);
int pspWidth = 0;
for (int w = 2; w <= 4096; w *= 2) {
if (w >= width && w >= height) {
pspWidth = w;
break;
}
}
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; x++) {
abgr[x] = convertARGBtoABGR(imageBuffer[x]);
}
imageBuffer += width;
abgr += pspWidth; // Smallest value power of 2 fitting width and height(needs to be square!)
usec += (width * height) / 14;
if (!Memory::IsValidRange(imageAddr, mjpegWidth * mjpegHeight * 4)) {
free(jpegBuf);
return hleLogError(ME, SCE_KERNEL_ERROR_INVALID_POINTER, "invalid output address");
}
// Note: even if you Delete, the size is still allowed.
if (width > mjpegWidth || height > mjpegHeight) {
free(jpegBuf);
return hleLogError(ME, ERROR_JPEG_INVALID_SIZE, "invalid output address");
}
if (mjpegInited == 0) {
// If you finish after setting the size, then call this - you get an interesting error.
free(jpegBuf);
return hleLogError(ME, 0x80000001, "mjpeg not inited");
}
usec += (width * height) / 110;
if (actual_components == 3 || actual_components == 1) {
u24_be *imageBuffer = (u24_be*)jpegBuf;
u32_le *abgr = (u32_le *)Memory::GetPointerUnchecked(imageAddr);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; x++) {
abgr[x] = convertARGBtoABGR(imageBuffer[x]);
}
imageBuffer += width;
abgr += mjpegWidth;
}
NotifyMemInfo(MemBlockFlags::WRITE, imageAddr, mjpegWidth * height, "JpegDecodeMJpeg");
}
free(jpegBuf);
return getWidthHeight(width, height);
return hleLogSuccessX(ME, getWidthHeight(width, height));
}
static int sceJpegDecodeMJpeg(u32 jpegAddr, int jpegSize, u32 imageAddr, int dhtMode) {
if (!Memory::IsValidAddress(jpegAddr)) {
ERROR_LOG(ME, "sceJpegDecodeMJpeg: Bad JPEG address 0x%08x", jpegAddr);
return 0;
}
if ((jpegAddr | jpegSize | (jpegAddr + jpegSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid jpeg address");
if (imageAddr & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid output address");
DEBUG_LOG(ME, "sceJpegDecodeMJpeg(%08x, %i, %08x, %i)", jpegAddr, jpegSize, imageAddr, dhtMode);
return __DecodeJpeg(jpegAddr, jpegSize, imageAddr);
}
static int sceJpegDeleteMJpeg() {
WARN_LOG(ME, "sceJpegDeleteMJpeg()");
return 0;
int usec = 300;
int result = DecodeJpeg(jpegAddr, jpegSize, imageAddr, usec);
return hleDelayResult(result, "jpeg decode", usec);
}
static int sceJpegDecodeMJpegSuccessively(u32 jpegAddr, int jpegSize, u32 imageAddr, int dhtMode) {
if (!Memory::IsValidAddress(jpegAddr)) {
ERROR_LOG(ME, "sceJpegDecodeMJpegSuccessively: Bad JPEG address 0x%08x", jpegAddr);
return 0;
}
if ((jpegAddr | jpegSize | (jpegAddr + jpegSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid jpeg address");
if (imageAddr & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid output address");
DEBUG_LOG(ME, "sceJpegDecodeMJpegSuccessively(%08x, %i, %08x, %i)", jpegAddr, jpegSize, imageAddr, dhtMode);
return __DecodeJpeg(jpegAddr, jpegSize, imageAddr);
int usec = 300;
int result = DecodeJpeg(jpegAddr, jpegSize, imageAddr, usec);
return hleDelayResult(result, "jpeg decode", usec);
}
static int sceJpegCsc(u32 imageAddr, u32 yCbCrAddr, int widthHeight, int bufferWidth, int colourInfo) {
if (bufferWidth < 0 || widthHeight < 0){
WARN_LOG(ME, "sceJpegCsc(%08x, %08x, %i, %i, %i)", imageAddr, yCbCrAddr, widthHeight, bufferWidth, colourInfo);
return ERROR_JPEG_INVALID_VALUE;
}
__JpegCsc(imageAddr, yCbCrAddr, widthHeight, bufferWidth);
DEBUG_LOG(ME, "sceJpegCsc(%08x, %08x, %i, %i, %i)", imageAddr, yCbCrAddr, widthHeight, bufferWidth, colourInfo);
return 0;
}
static int sceJpegFinishMJpeg() {
WARN_LOG(ME, "sceJpegFinishMJpeg()");
return 0;
int usec = 0;
int result = JpegCsc(imageAddr, yCbCrAddr, widthHeight, bufferWidth, colourInfo, usec);
if (usec != 0)
return hleDelayResult(result, "jpeg csc", usec);
return result;
}
static int getYCbCrBufferSize(int w, int h) {
@ -218,21 +358,30 @@ static int getYCbCrBufferSize(int w, int h) {
return ((w * h) >> 1) * 3;
}
static int __JpegGetOutputInfo(u32 jpegAddr, int jpegSize, u32 colourInfoAddr) {
const u8 *buf = Memory::GetPointer(jpegAddr);
static int JpegGetOutputInfo(u32 jpegAddr, int jpegSize, u32 colourInfoAddr) {
if (!Memory::IsValidRange(jpegAddr, jpegSize))
return hleLogError(ME, ERROR_JPEG_NO_SOI, "invalid jpeg address");
if (jpegSize == 0)
return hleLogError(ME, ERROR_JPEG_INVALID_DATA, "invalid jpeg data");
NotifyMemInfo(MemBlockFlags::READ, jpegAddr, jpegSize, "JpegGetOutputInfo");
const u8 *buf = Memory::GetPointerUnchecked(jpegAddr);
if (jpegSize < 2 || buf[0] != 0xFF || buf[1] != 0xD8)
return hleLogError(ME, ERROR_JPEG_NO_SOI, "no SOI found, invalid data");
int width, height, actual_components;
unsigned char *jpegBuf = jpgd::decompress_jpeg_image_from_memory(buf, jpegSize, &width, &height, &actual_components, 3);
if (actual_components != 3) {
// The assumption that the image was RGB was wrong...
// Try again.
if (actual_components != 3 && actual_components != 1) {
// The assumption that the image was RGB was wrong...
// Try again.
int components = actual_components;
jpegBuf = jpgd::decompress_jpeg_image_from_memory(buf, jpegSize, &width, &height, &actual_components, components);
}
if (jpegBuf == NULL) {
ERROR_LOG(ME, "sceJpegGetOutputInfo: Bad JPEG data");
return getYCbCrBufferSize(0, 0);
if (jpegBuf == nullptr) {
return hleLogError(ME, ERROR_JPEG_INVALID_DATA, "unable to decompress jpeg");
}
free(jpegBuf);
@ -243,34 +392,36 @@ static int __JpegGetOutputInfo(u32 jpegAddr, int jpegSize, u32 colourInfoAddr) {
// - Bits 8 to 16 (Vertical chroma subsampling value): 0x00, 0x01 or 0x02
// - Bits 0 to 8 (Horizontal chroma subsampling value): 0x00, 0x01 or 0x02
if (Memory::IsValidAddress(colourInfoAddr)) {
// Note: can't actually seem to get any other subsampling values or color modes to work on a PSP.
Memory::Write_U32(0x00020202, colourInfoAddr);
NotifyMemInfo(MemBlockFlags::WRITE, colourInfoAddr, 4, "JpegGetOutputInfo");
}
#ifdef JPEG_DEBUG
char jpeg_fname[256];
u8 *jpegDumpBuf = Memory::GetPointer(jpegAddr);
const u8 *jpegDumpBuf = Memory::GetPointer(jpegAddr);
u32 jpeg_xxhash = XXH32((const char *)jpegDumpBuf, jpegSize, 0xC0108888);
sprintf(jpeg_fname, "Jpeg\\%X.jpg", jpeg_xxhash);
FILE *wfp = fopen(jpeg_fname, "wb");
Path jpegDir("Jpeg");
Path jpegFile = jpegDir / StringFromFormat("%X.jpg", jpeg_xxhash);
FILE *wfp = File::OpenCFile(jpegFile, "wb");
if (!wfp) {
_wmkdir(L"Jpeg\\");
wfp = fopen(jpeg_fname, "wb");
File::CreateDir(jpegDir);
wfp = File::OpenCFile(jpegFile, "wb");
}
fwrite(jpegDumpBuf, 1, jpegSize, wfp);
fclose(wfp);
#endif //JPEG_DEBUG
return getYCbCrBufferSize(width, height);
return hleLogSuccessX(ME, getYCbCrBufferSize(width, height));
}
static int sceJpegGetOutputInfo(u32 jpegAddr, int jpegSize, u32 colourInfoAddr, int dhtMode) {
if (!Memory::IsValidAddress(jpegAddr)) {
ERROR_LOG(ME, "sceJpegGetOutputInfo: Bad JPEG address 0x%08x", jpegAddr);
return getYCbCrBufferSize(0, 0);
}
if ((jpegAddr | jpegSize | (jpegAddr + jpegSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid jpeg address");
DEBUG_LOG(ME, "sceJpegGetOutputInfo(%08x, %i, %08x, %i)", jpegAddr, jpegSize, colourInfoAddr, dhtMode);
return __JpegGetOutputInfo(jpegAddr, jpegSize, colourInfoAddr);
int result = JpegGetOutputInfo(jpegAddr, jpegSize, colourInfoAddr);
// Time taken varies a bit, this is the low end. Depends on data.
// Note that errors delay as well.
return hleDelayResult(result, "jpeg get output info", 250);
}
static u32 convertRGBToYCbCr(u32 rgb) {
@ -290,86 +441,124 @@ static u32 convertRGBToYCbCr(u32 rgb) {
return (y << 16) | (cb << 8) | cr;
}
static int __JpegConvertRGBToYCbCr (const void *data, u32 bufferOutputAddr, int width, int height) {
u24_be *imageBuffer = (u24_be*)data;
static int JpegConvertRGBToYCbCr(const void *data, u8 *output, int width, int height) {
u24_be *imageBuffer = (u24_be *)data;
int sizeY = width * height;
int sizeCb = sizeY >> 2;
u8 *Y = (u8*)Memory::GetPointer(bufferOutputAddr);
u8 *Y = output;
u8 *Cb = Y + sizeY;
u8 *Cr = Cb + sizeCb;
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; x += 4) {
u32 abgr0 = imageBuffer[x + 0];
u32 abgr1 = imageBuffer[x + 1];
u32 abgr2 = imageBuffer[x + 2];
u32 abgr3 = imageBuffer[x + 3];
if ((width & 1) == 0 && (height & 1) == 0) {
for (int y = 0; y < height; y += 2) {
for (int x = 0; x < width; x += 2) {
u32 rgb0 = imageBuffer[width * y + x];
u32 rgb1 = imageBuffer[width * y + x + 1];
u32 rgb2 = imageBuffer[width * (y + 1) + x];
u32 rgb3 = imageBuffer[width * (y + 1) + x + 1];
u32 yCbCr0 = convertRGBToYCbCr(abgr0);
u32 yCbCr1 = convertRGBToYCbCr(abgr1);
u32 yCbCr2 = convertRGBToYCbCr(abgr2);
u32 yCbCr3 = convertRGBToYCbCr(abgr3);
u32 yCbCr0 = convertRGBToYCbCr(rgb0);
u32 yCbCr1 = convertRGBToYCbCr(rgb1);
u32 yCbCr2 = convertRGBToYCbCr(rgb2);
u32 yCbCr3 = convertRGBToYCbCr(rgb3);
Y[x + 0] = (yCbCr0 >> 16) & 0xFF;
Y[x + 1] = (yCbCr1 >> 16) & 0xFF;
Y[x + 2] = (yCbCr2 >> 16) & 0xFF;
Y[x + 3] = (yCbCr3 >> 16) & 0xFF;
*Cb++ = (yCbCr0 >> 8) & 0xFF;
*Cr++ = yCbCr0 & 0xFF;
Y[width * y + x] = (yCbCr0 >> 16) & 0xFF;
Y[width * y + x + 1] = (yCbCr1 >> 16) & 0xFF;
Y[width * (y + 1) + x] = (yCbCr2 >> 16) & 0xFF;
Y[width * (y + 1) + x + 1] = (yCbCr3 >> 16) & 0xFF;
Cb[(width >> 1) * (y >> 1) + (x >> 1)] = (yCbCr0 >> 8) & 0xFF;
Cr[(width >> 1) * (y >> 1) + (x >> 1)] = yCbCr0 & 0xFF;
}
}
} else {
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
u32 rgb = imageBuffer[width * y + x];
u32 yCbCr = convertRGBToYCbCr(rgb);
Y[width * y + x] = (yCbCr >> 16) & 0xFF;
if ((y & 1) == 0 && (x & 1) == 0) {
// Ideally, would average, but I suppose these just came from a JPEG, so they ought to match.
Cb[(width >> 1) * (y >> 1) + (x >> 1)] = (yCbCr >> 8) & 0xFF;
Cr[(width >> 1) * (y >> 1) + (x >> 1)] = yCbCr & 0xFF;
}
}
}
imageBuffer += width;
Y += width ;
}
return getWidthHeight(width, height);
}
static int __JpegDecodeMJpegYCbCr(u32 jpegAddr, int jpegSize, u32 yCbCrAddr) {
const u8 *buf = Memory::GetPointer(jpegAddr);
static int JpegDecodeMJpegYCbCr(u32 jpegAddr, int jpegSize, u32 yCbCrAddr, int yCbCrSize, int &usec) {
if (!Memory::IsValidRange(jpegAddr, jpegSize))
return hleLogError(ME, ERROR_JPEG_NO_SOI, "invalid jpeg address");
if (jpegSize == 0)
return hleLogError(ME, ERROR_JPEG_INVALID_DATA, "invalid jpeg data");
NotifyMemInfo(MemBlockFlags::READ, jpegAddr, jpegSize, "JpegDecodeMJpegYCbCr");
const u8 *buf = Memory::GetPointerUnchecked(jpegAddr);
if (jpegSize < 2 || buf[0] != 0xFF || buf[1] != 0xD8)
return hleLogError(ME, ERROR_JPEG_NO_SOI, "no SOI found, invalid data");
int width, height, actual_components;
unsigned char *jpegBuf = jpgd::decompress_jpeg_image_from_memory(buf, jpegSize, &width, &height, &actual_components, 3);
if (actual_components != 3) {
if (actual_components != 3 && actual_components != 1) {
// The assumption that the image was RGB was wrong...
// Try again.
int components = actual_components;
jpegBuf = jpgd::decompress_jpeg_image_from_memory(buf, jpegSize, &width, &height, &actual_components, components);
}
if (jpegBuf == NULL) {
return getWidthHeight(0, 0);
if (jpegBuf == nullptr) {
return hleLogError(ME, ERROR_JPEG_INVALID_DATA, "unable to decompress jpeg");
}
if (actual_components == 3) {
__JpegConvertRGBToYCbCr(jpegBuf, yCbCrAddr, width, height);
if (yCbCrSize < getYCbCrBufferSize(width, height)) {
free(jpegBuf);
return hleLogError(ME, ERROR_JPEG_OUT_OF_MEMORY, "buffer not large enough");
}
// Technically, it seems like the PSP doesn't support grayscale, but we might as well.
if (actual_components == 3 || actual_components == 1) {
if (Memory::IsValidRange(yCbCrAddr, getYCbCrBufferSize(width, height))) {
JpegConvertRGBToYCbCr(jpegBuf, Memory::GetPointerWriteUnchecked(yCbCrAddr), width, height);
NotifyMemInfo(MemBlockFlags::WRITE, yCbCrAddr, getYCbCrBufferSize(width, height), "JpegDecodeMJpegYCbCr");
} else {
// There's some weird behavior on the PSP where it writes data around the last passed address.
WARN_LOG_REPORT(ME, "JpegDecodeMJpegYCbCr: Invalid output address (%08x / %08x) for %dx%d", yCbCrAddr, yCbCrSize, width, height);
}
}
free(jpegBuf);
// TODO: There's more...
return getWidthHeight(width, height);
// Rough estimate based on observed timing.
usec += (width * height) / 14;
return hleLogSuccessX(ME, getWidthHeight(width, height));
}
static int sceJpegDecodeMJpegYCbCr(u32 jpegAddr, int jpegSize, u32 yCbCrAddr, int yCbCrSize, int dhtMode) {
if (!Memory::IsValidAddress(jpegAddr)) {
ERROR_LOG(ME, "sceJpegDecodeMJpegYCbCr: Bad JPEG address 0x%08x", jpegAddr);
return getWidthHeight(0, 0);
}
if ((jpegAddr | jpegSize | (jpegAddr + jpegSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid jpeg address");
if ((yCbCrAddr | yCbCrSize | (yCbCrAddr + yCbCrSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid output address");
if (!Memory::IsValidRange(jpegAddr, jpegSize))
return hleLogError(ME, ERROR_JPEG_INVALID_VALUE, "invalid jpeg address");
DEBUG_LOG(ME, "sceJpegDecodeMJpegYCbCr(%08x, %i, %08x, %i, %i)", jpegAddr, jpegSize, yCbCrAddr, yCbCrSize, dhtMode);
return __JpegDecodeMJpegYCbCr(jpegAddr, jpegSize, yCbCrAddr);
int usec = 300;
int result = JpegDecodeMJpegYCbCr(jpegAddr, jpegSize, yCbCrAddr, yCbCrSize, usec);
return hleDelayResult(result, "jpeg decode", usec);
}
static int sceJpegDecodeMJpegYCbCrSuccessively(u32 jpegAddr, int jpegSize, u32 yCbCrAddr, int yCbCrSize, int dhtMode) {
if (!Memory::IsValidAddress(jpegAddr)) {
ERROR_LOG(ME, "sceJpegDecodeMJpegYCbCrSuccessively: Bad JPEG address 0x%08x", jpegAddr);
return getWidthHeight(0, 0);
}
if ((jpegAddr | jpegSize | (jpegAddr + jpegSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid jpeg address");
if ((yCbCrAddr | yCbCrSize | (yCbCrAddr + yCbCrSize)) & 0x80000000)
return hleLogError(ME, SCE_KERNEL_ERROR_PRIV_REQUIRED, "invalid output address");
DEBUG_LOG(ME, "sceJpegDecodeMJpegYCbCrSuccessively(%08x, %i, %08x, %i, %i)", jpegAddr, jpegSize, yCbCrAddr, yCbCrSize, dhtMode);
// Do as same way as sceJpegDecodeMJpegYCbCr() but with smaller block size
return __JpegDecodeMJpegYCbCr(jpegAddr, jpegSize, yCbCrAddr);
int usec = 300;
int result = JpegDecodeMJpegYCbCr(jpegAddr, jpegSize, yCbCrAddr, yCbCrSize, usec);
return hleDelayResult(result, "jpeg decode", usec);
}
static int sceJpeg_9B36444C() {
@ -378,16 +567,49 @@ static int sceJpeg_9B36444C() {
}
static int sceJpegCreateMJpeg(int width, int height) {
if (mjpegInited == 0)
return hleLogError(ME, ERROR_JPEG_INVALID_STATE, "not yet inited");
if (mjpegInited == 2)
return hleLogError(ME, ERROR_JPEG_INVALID_STATE, "already created");
if (width > 1024)
return hleLogError(ME, ERROR_JPEG_INVALID_SIZE, "width outside bounds");
mjpegInited = 2;
mjpegWidth = width;
mjpegHeight = height;
INFO_LOG(ME, "sceJpegCreateMJpeg(%i, %i)", width, height);
return 0;
return hleLogSuccessInfoI(ME, 0);
}
static int sceJpegDeleteMJpeg() {
if (mjpegInited == 0)
return hleLogError(ME, ERROR_JPEG_INVALID_STATE, "not yet inited");
if (mjpegInited == 1)
return hleLogError(ME, ERROR_JPEG_INVALID_STATE, "not yet created");
mjpegInited = 1;
return hleLogSuccessInfoI(ME, 0);
}
static int sceJpegInitMJpeg() {
WARN_LOG(ME, "sceJpegInitMJpeg()");
return 0;
if (mjpegInited == 1 || mjpegInited == 2)
return hleLogError(ME, ERROR_JPEG_ALREADY_INIT, "already inited");
// If it was -1, it's from an old save state, avoid double init error but assume inited.
if (mjpegInited == 0)
mjpegInited = 1;
return hleLogSuccessI(ME, hleDelayResult(0, "mjpeg init", 130));
}
static int sceJpegFinishMJpeg() {
if (mjpegInited == 0)
return hleLogError(ME, ERROR_JPEG_INVALID_STATE, "already inited");
if (mjpegInited == 2)
return hleLogError(ME, ERROR_JPEG_INVALID_STATE, "mjpeg not deleted");
// Even from an old save state, if we see this we leave compat mode.
mjpegInited = 0;
return hleLogSuccessI(ME, hleDelayResult(0, "mjpeg finish", 120));
}
static int sceJpegMJpegCscWithColorOption() {
@ -400,18 +622,25 @@ static int sceJpegDecompressAllImage() {
return 0;
}
void JpegNotifyLoadStatus(int state) {
if (state == -1) {
// Reset our state on unload.
__JpegInit();
}
}
const HLEFunction sceJpeg[] =
{
{0X0425B986, &WrapI_V<sceJpegDecompressAllImage>, "sceJpegDecompressAllImage", 'i', "" },
{0X04B5AE02, &WrapI_UUII<sceJpegMJpegCsc>, "sceJpegMJpegCsc", 'i', "xxii" },
{0X04B93CEF, &WrapI_UIUI<sceJpegDecodeMJpeg>, "sceJpegDecodeMJpeg", 'i', "xixi" },
{0X227662D7, &WrapI_UIUII<sceJpegDecodeMJpegYCbCrSuccessively>, "sceJpegDecodeMJpegYCbCrSuccessively", 'i', "xixii"},
{0X04B5AE02, &WrapI_UUII<sceJpegMJpegCsc>, "sceJpegMJpegCsc", 'i', "xxxi" },
{0X04B93CEF, &WrapI_UIUI<sceJpegDecodeMJpeg>, "sceJpegDecodeMJpeg", 'x', "xixi" },
{0X227662D7, &WrapI_UIUII<sceJpegDecodeMJpegYCbCrSuccessively>, "sceJpegDecodeMJpegYCbCrSuccessively", 'x', "xixii"},
{0X48B602B7, &WrapI_V<sceJpegDeleteMJpeg>, "sceJpegDeleteMJpeg", 'i', "" },
{0X64B6F978, &WrapI_UIUI<sceJpegDecodeMJpegSuccessively>, "sceJpegDecodeMJpegSuccessively", 'i', "xixi" },
{0X67F0ED84, &WrapI_UUIII<sceJpegCsc>, "sceJpegCsc", 'i', "xxiii"},
{0X64B6F978, &WrapI_UIUI<sceJpegDecodeMJpegSuccessively>, "sceJpegDecodeMJpegSuccessively", 'x', "xixi" },
{0X67F0ED84, &WrapI_UUIII<sceJpegCsc>, "sceJpegCsc", 'i', "xxxix"},
{0X7D2F3D7F, &WrapI_V<sceJpegFinishMJpeg>, "sceJpegFinishMJpeg", 'i', "" },
{0X8F2BB012, &WrapI_UIUI<sceJpegGetOutputInfo>, "sceJpegGetOutputInfo", 'i', "xixi" },
{0X91EED83C, &WrapI_UIUII<sceJpegDecodeMJpegYCbCr>, "sceJpegDecodeMJpegYCbCr", 'i', "xixii"},
{0X8F2BB012, &WrapI_UIUI<sceJpegGetOutputInfo>, "sceJpegGetOutputInfo", 'x', "xipi" },
{0X91EED83C, &WrapI_UIUII<sceJpegDecodeMJpegYCbCr>, "sceJpegDecodeMJpegYCbCr", 'x', "xixii"},
{0X9B36444C, &WrapI_V<sceJpeg_9B36444C>, "sceJpeg_9B36444C", 'i', "" },
{0X9D47469C, &WrapI_II<sceJpegCreateMJpeg>, "sceJpegCreateMJpeg", 'i', "ii" },
{0XAC9E70E6, &WrapI_V<sceJpegInitMJpeg>, "sceJpegInitMJpeg", 'i', "" },

View File

@ -19,6 +19,8 @@
class PointerWrap;
void JpegNotifyLoadStatus(int state);
void Register_sceJpeg();
void __JpegInit();
void __JpegDoState(PointerWrap &p);

View File

@ -32,7 +32,6 @@ enum {
ERROR_MPEG_NOT_YET_INIT = 0x80618009,
ERROR_MPEG_AVC_INVALID_VALUE = 0x806201fe,
ERROR_MPEG_AVC_DECODE_FATAL = 0x80628002,
ERROR_JPEG_INVALID_VALUE = 0x80650051,
};
// MPEG statics.

View File

@ -34,6 +34,7 @@
#include "Core/Reporting.h"
#include "Core/System.h"
#include "Core/HLE/sceJpeg.h"
#include "Core/HLE/sceKernel.h"
#include "Core/HLE/sceKernelInterrupt.h"
#include "Core/HLE/sceKernelMemory.h"
@ -79,16 +80,21 @@ static const int mpegBaseModuleDeps[] = {0x0300, 0};
static const int mp4ModuleDeps[] = {0x0300, 0};
struct ModuleLoadInfo {
ModuleLoadInfo(int m, u32 s) : mod(m), size(s), dependencies(noDeps) {
ModuleLoadInfo(int m, u32 s, void(*n)(int) = nullptr) : mod(m), size(s), dependencies(noDeps), notify(n) {
}
ModuleLoadInfo(int m, u32 s, const int *d) : mod(m), size(s), dependencies(d) {
ModuleLoadInfo(int m, u32 s, const int *d, void(*n)(int) = nullptr) : mod(m), size(s), dependencies(d), notify(n) {
}
const int mod;
const u32 size;
const int *const dependencies;
void (*notify)(int state);
};
static void NotifyLoadStatusAvcodec(int state) {
JpegNotifyLoadStatus(state);
}
static const ModuleLoadInfo moduleLoadInfo[] = {
ModuleLoadInfo(0x0100, 0x00014000),
ModuleLoadInfo(0x0101, 0x00020000),
@ -104,7 +110,7 @@ static const ModuleLoadInfo moduleLoadInfo[] = {
ModuleLoadInfo(0x0202, 0x00000000),
ModuleLoadInfo(0x0203, 0x00000000),
ModuleLoadInfo(0x02ff, 0x00000000),
ModuleLoadInfo(0x0300, 0x00000000),
ModuleLoadInfo(0x0300, 0x00000000, &NotifyLoadStatusAvcodec),
ModuleLoadInfo(0x0301, 0x00000000),
ModuleLoadInfo(0x0302, 0x00008000, atrac3PlusModuleDeps),
ModuleLoadInfo(0x0303, 0x0000c000, mpegBaseModuleDeps),
@ -470,12 +476,16 @@ static u32 sceUtilityLoadAvModule(u32 module)
}
INFO_LOG(SCEUTILITY, "0=sceUtilityLoadAvModule(%i)", module);
if (module == 0)
JpegNotifyLoadStatus(1);
return hleDelayResult(0, "utility av module loaded", 25000);
}
static u32 sceUtilityUnloadAvModule(u32 module)
{
INFO_LOG(SCEUTILITY,"0=sceUtilityUnloadAvModule(%i)", module);
if (module == 0)
JpegNotifyLoadStatus(-1);
return hleDelayResult(0, "utility av module unloaded", 800);
}
@ -516,6 +526,9 @@ static u32 sceUtilityLoadModule(u32 module) {
currentlyLoadedModules[module] = 0;
}
if (info->notify)
info->notify(1);
// TODO: Each module has its own timing, technically, but this is a low-end.
if (module == 0x3FF)
return hleDelayResult(hleLogSuccessInfoI(SCEUTILITY, 0), "utility module loaded", 130);
@ -537,6 +550,9 @@ static u32 sceUtilityUnloadModule(u32 module) {
}
currentlyLoadedModules.erase(module);
if (info->notify)
info->notify(-1);
// TODO: Each module has its own timing, technically, but this is a low-end.
if (module == 0x3FF)
return hleDelayResult(hleLogSuccessInfoI(SCEUTILITY, 0), "utility module unloaded", 110);

View File

@ -289,6 +289,7 @@ static const HardHashTableEntry hardcodedHashes[] = {
{ 0x70a6152b265228e8, 296, "unendingbloodycall_download_frame", }, // unENDing Bloody Call
{ 0x7245b74db370ae72, 64, "vmmul_q_transp3", },
{ 0x7259d52b21814a5a, 40, "vtfm_t_transp", },
{ 0x730f59cc6c0f5732, 452, "godseaterburst_depthmask_5551", }, // Gods Eater Burst (US)
{ 0x7354fd206796d817, 864, "flowers_download_frame", }, // Flowers
{ 0x736b34ebc702d873, 104, "vmmul_q_transp", },
{ 0x73a614c08f777d52, 792, "danganronpa2_2_download_frame", }, // Danganronpa 2

View File

@ -174,7 +174,10 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
case GE_FORMAT_CLUT8:
if (shift == 0 && mask == 0xFF) {
// Easy peasy.
sprintf(lookupMethod, "index.r");
if (writer.Lang().shaderLanguage == HLSL_D3D9)
sprintf(lookupMethod, "index.a");
else
sprintf(lookupMethod, "index.r");
formatOK = true;
} else {
// Deal with this if we find it.
@ -302,6 +305,10 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.
// Technically, the clutBase should be |'d, not added, but that's hard with floats.
float texel_offset = ((float)config.startPos + 0.5f) / texturePixels;
if (writer.Lang().shaderLanguage == HLSL_D3D9) {
// Seems to need a half-pixel offset fix? Might mean it was rendered wrong...
texel_offset += 0.5f / texturePixels;
}
char offset[128] = "";
sprintf(offset, " + %f", texel_offset);

View File

@ -567,13 +567,22 @@ void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
uint32_t boundDepthBuffer = gstate.getDepthBufRawAddress() | 0x04000000;
if (currentRenderVfb_->z_address != boundDepthBuffer) {
uint32_t boundDepthStride = gstate.DepthBufStride();
if (currentRenderVfb_->z_address != boundDepthBuffer || currentRenderVfb_->z_stride != boundDepthStride) {
if (currentRenderVfb_->fb_address == boundDepthBuffer) {
// Disallow setting depth buffer to the same address as the color buffer, usually means it's not used.
WARN_LOG_N_TIMES(z_reassign, 5, G3D, "Ignoring color matching depth buffer at %08x", boundDepthBuffer);
boundDepthBuffer = 0;
boundDepthStride = 0;
}
WARN_LOG_N_TIMES(z_reassign, 5, G3D, "Framebuffer at %08x/%d has switched associated depth buffer from %08x to %08x, updating.",
currentRenderVfb_->fb_address, currentRenderVfb_->fb_stride, currentRenderVfb_->z_address, boundDepthBuffer);
// Technically, here we should copy away the depth buffer to another framebuffer that uses that z_address, or maybe
// even write it back to RAM. However, this is rare. Silent Hill is one example, see #16126.
currentRenderVfb_->z_address = boundDepthBuffer;
// Update the stride in case it changed.
currentRenderVfb_->z_stride = boundDepthStride;
if (currentRenderVfb_->fbo) {
char tag[128];
@ -1657,13 +1666,14 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
VirtualFramebuffer *srcBuffer = nullptr;
bool ignoreDstBuffer = flags & GPUCopyFlag::FORCE_DST_MEM;
bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MEM | GPUCopyFlag::MEMSET);
RasterChannel channel = flags & GPUCopyFlag::DEPTH_REQUESTED ? RASTER_DEPTH : RASTER_COLOR;
u32 dstY = (u32)-1;
u32 dstH = 0;
u32 srcY = (u32)-1;
u32 srcH = 0;
for (auto vfb : vfbs_) {
if (vfb->fb_stride == 0) {
if (vfb->fb_stride == 0 || channel != RASTER_COLOR) {
continue;
}
@ -1715,14 +1725,36 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
}
}
if (channel == RASTER_DEPTH) {
srcBuffer = nullptr;
dstBuffer = nullptr;
// Let's assume exact matches only for simplicity.
for (auto vfb : vfbs_) {
if (!ignoreDstBuffer && dst == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
if (!dstBuffer || dstBuffer->depthBindSeq < vfb->depthBindSeq) {
dstBuffer = vfb;
dstY = 0;
dstH = vfb->height;
}
}
if (!ignoreSrcBuffer && src == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
if (!srcBuffer || srcBuffer->depthBindSeq < vfb->depthBindSeq) {
srcBuffer = vfb;
srcY = 0;
srcH = vfb->height;
}
}
}
}
if (!useBufferedRendering_) {
// If we're copying into a recently used display buf, it's probably destined for the screen.
if (srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
if (channel == RASTER_DEPTH || srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
return false;
}
}
if (!dstBuffer && srcBuffer) {
if (!dstBuffer && srcBuffer && channel != RASTER_DEPTH) {
// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB) {
@ -1740,7 +1772,7 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
} else {
WARN_LOG_ONCE(dstnotsrccpy, G3D, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
// Just do the blit!
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, RASTER_COLOR, "Blit_InterBufferMemcpy");
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, channel, "Blit_InterBufferMemcpy");
SetColorUpdated(dstBuffer, skipDrawReason);
RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy");
}
@ -1752,7 +1784,9 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
WARN_LOG_ONCE(btucpy, G3D, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
FlushBeforeCopy();
const u8 *srcBase = Memory::GetPointerUnchecked(src);
DrawPixels(dstBuffer, 0, dstY, srcBase, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstH, RASTER_COLOR, "MemcpyFboUpload_DrawPixels");
GEBufferFormat srcFormat = channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : dstBuffer->fb_format;
int srcStride = channel == RASTER_DEPTH ? dstBuffer->z_stride : dstBuffer->fb_stride;
DrawPixels(dstBuffer, 0, dstY, srcBase, srcFormat, srcStride, dstBuffer->width, dstH, channel, "MemcpyFboUpload_DrawPixels");
SetColorUpdated(dstBuffer, skipDrawReason);
RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
// This is a memcpy, let's still copy just in case.
@ -1762,8 +1796,8 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
FlushBeforeCopy();
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
WARN_LOG_ONCE(btdcpyheight, G3D, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
} else if (g_Config.bBlockTransferGPU && !srcBuffer->memoryUpdated) {
ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH, RASTER_COLOR);
} else if (g_Config.bBlockTransferGPU && (!srcBuffer->memoryUpdated || channel == RASTER_DEPTH)) {
ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH, channel);
srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
}
return false;
@ -2580,7 +2614,7 @@ void FramebufferManagerCommon::PackFramebufferSync(VirtualFramebuffer *vfb, int
const int dstByteOffset = (y * stride + x) * dstBpp;
// Leave the gap between the end of the last line and the full stride.
// This is only used for the NotifyMemInfo range.
const int dstSize = (h * stride + w - 1) * dstBpp;
const int dstSize = ((h - 1) * stride + w) * dstBpp;
if (!Memory::IsValidRange(fb_address + dstByteOffset, dstSize)) {
ERROR_LOG_REPORT(G3D, "PackFramebufferSync would write outside of memory, ignoring");
@ -2594,7 +2628,11 @@ void FramebufferManagerCommon::PackFramebufferSync(VirtualFramebuffer *vfb, int
DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
if (destPtr) {
draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackFramebufferSync");
if (channel == RASTER_DEPTH)
PackDepthbuffer(vfb, x, y, w, h);
else
draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackFramebufferSync");
char tag[128];
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
@ -2605,6 +2643,17 @@ void FramebufferManagerCommon::PackFramebufferSync(VirtualFramebuffer *vfb, int
gpuStats.numReadbacks++;
}
void FramebufferManagerCommon::PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) {
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
Draw::DataFormat destFormat = GEFormatToThin3D(GE_FORMAT_DEPTH16);
const int dstByteOffset = (y * vfb->z_stride + x) * 2;
u8 *destPtr = Memory::GetPointerWriteUnchecked(vfb->z_address + dstByteOffset);
if (!draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, vfb->fb_stride, "PackDepthbuffer")) {
WARN_LOG(G3D, "PackDepthbuffer failed");
}
}
void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) {
// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
if (x + w >= vfb->bufferWidth) {
@ -2623,7 +2672,8 @@ void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb,
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
} else if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
// Mark it as fully downloaded until next render to it.
vfb->memoryUpdated = true;
if (channel == RASTER_COLOR)
vfb->memoryUpdated = true;
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
} else {
// Let's try to set the flag eventually, if the game copies a lot.

View File

@ -442,6 +442,8 @@ public:
protected:
virtual void PackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
// Used for when a shader is required, such as GLES.
virtual void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
void SetViewport2D(int x, int y, int w, int h);
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);

View File

@ -49,10 +49,10 @@ public:
protected:
void DecimateFBOs() override;
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) override;
private:
void PackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) override;
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
bool GetRenderTargetFramebuffer(LPDIRECT3DSURFACE9 renderTarget, LPDIRECT3DSURFACE9 offscreen, int w, int h, GPUDebugBuffer &buffer);
LPDIRECT3DDEVICE9 device_;

View File

@ -47,6 +47,8 @@ Draw::DataFormat FromD3D9Format(u32 fmt) {
return Draw::DataFormat::A1R5G5B5_UNORM_PACK16;
case D3DFMT_R5G6B5:
return Draw::DataFormat::R5G6B5_UNORM_PACK16;
case D3DFMT_A8:
return Draw::DataFormat::R8_UNORM;
case D3DFMT_A8R8G8B8:
default:
return Draw::DataFormat::R8G8B8A8_UNORM;
@ -251,6 +253,8 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
dstFmt = ToD3D9Format(plan.replaced->Format(plan.baseLevelSrc));
} else if (plan.scaleFactor > 1 || plan.saveTexture) {
dstFmt = D3DFMT_A8R8G8B8;
} else if (plan.decodeToClut8) {
dstFmt = D3DFMT_A8;
}
int levels;

View File

@ -38,10 +38,9 @@ public:
protected:
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h) override;
private:
void PackDepthbuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h);
u8 *convBuf_ = nullptr;
u32 convBufSize_ = 0;

View File

@ -1715,13 +1715,22 @@ void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
void GPUCommon::CheckDepthUsage(VirtualFramebuffer *vfb) {
if (!gstate_c.usingDepth) {
bool isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
bool isReadingDepth = false;
bool isClearingDepth = false;
bool isWritingDepth = false;
if (gstate.isModeClear()) {
isClearingDepth = gstate.isClearModeDepthMask();
isWritingDepth = isClearingDepth;
} else if (gstate.isDepthTestEnabled()) {
isWritingDepth = gstate.isDepthWriteEnabled();
isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS;
}
if ((gstate.isDepthTestEnabled() || isClearingDepth)) {
if (isWritingDepth || isReadingDepth) {
gstate_c.usingDepth = true;
gstate_c.clearingDepth = isClearingDepth;
vfb->last_frame_depth_render = gpuStats.numFlips;
if (isClearingDepth || gstate.isDepthWriteEnabled()) {
if (isWritingDepth) {
vfb->last_frame_depth_updated = gpuStats.numFlips;
}
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);

View File

@ -120,7 +120,8 @@ enum class GPUCopyFlag {
FORCE_DST_MEM = 2,
// Note: implies src == dst and FORCE_SRC_MEM.
MEMSET = 4,
DEBUG_NOTIFIED = 8,
DEPTH_REQUESTED = 8,
DEBUG_NOTIFIED = 16,
};
ENUM_CLASS_BITOPS(GPUCopyFlag);

View File

@ -234,7 +234,7 @@ u32 GPU_Vulkan::CheckGPUFeatures() const {
// Fall back to geometry shader culling if we can't do vertex range culling.
if (enabledFeatures.geometryShader) {
const bool useGeometry = g_Config.bUseGeometryShader && !draw_->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW);
const bool useGeometry = g_Config.bUseGeometryShader && !draw_->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW_OR_BROKEN);
const bool vertexSupported = draw_->GetDeviceCaps().clipDistanceSupported && draw_->GetDeviceCaps().cullDistanceSupported;
if (useGeometry && (!vertexSupported || (features & GPU_SUPPORTS_VS_RANGE_CULLING) == 0)) {
// Switch to culling via the geometry shader if not fully supported in vertex.

View File

@ -474,7 +474,7 @@ void GameSettingsScreen::CreateViews() {
}
if (GetGPUBackend() == GPUBackend::VULKAN) {
const bool usable = !draw->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW);
const bool usable = !draw->GetBugs().Has(Draw::Bugs::GEOMETRY_SHADERS_SLOW_OR_BROKEN);
const bool vertexSupported = draw->GetDeviceCaps().clipDistanceSupported && draw->GetDeviceCaps().cullDistanceSupported;
if (usable && !vertexSupported) {
CheckBox *geometryCulling = graphicsSettings->Add(new CheckBox(&g_Config.bUseGeometryShader, gr->T("Geometry shader culling")));