Merge branch 'master' into feature_openxr_vulkan

This commit is contained in:
Lubos 2022-09-15 00:22:46 +02:00
commit f98381fb57
31 changed files with 201 additions and 171 deletions

View File

@ -47,12 +47,12 @@ struct TinySet {
size_t otherSize = other.size();
if (size() + otherSize <= MaxFastSize) {
// Fast case
for (int i = 0; i < otherSize; i++) {
for (size_t i = 0; i < otherSize; i++) {
fastLookup_[fastCount + i] = other.fastLookup_[i];
}
fastCount += other.fastCount;
} else {
for (int i = 0; i < otherSize; i++) {
for (size_t i = 0; i < otherSize; i++) {
push_back(other[i]);
}
}

View File

@ -1219,7 +1219,7 @@ bool OpenGLPipeline::LinkShaders() {
}
std::vector<GLRProgram::Initializer> initialize;
for (int i = 0; i < MAX_TEXTURE_SLOTS; ++i) {
if (i < samplers_.size()) {
if (i < (int)samplers_.size()) {
initialize.push_back({ &samplerLocs_[i], 0, i });
} else {
samplerLocs_[i] = -1;

View File

@ -1086,7 +1086,7 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
_dbg_assert_((int)input->attributes.size() == (int)input->visc.vertexAttributeDescriptionCount);
gDesc.ibd = input->bindings[0];
for (int i = 0; i < input->attributes.size(); i++) {
for (size_t i = 0; i < input->attributes.size(); i++) {
gDesc.attrs[i] = input->attributes[i];
}
gDesc.vis.vertexAttributeDescriptionCount = input->visc.vertexAttributeDescriptionCount;

View File

@ -1149,7 +1149,9 @@ void PSPSaveDialog::ExecuteNotVisibleIOAction() {
case SCE_UTILITY_SAVEDATA_TYPE_READDATA:
case SCE_UTILITY_SAVEDATA_TYPE_READDATASECURE:
result = param.Load(param.GetPspParam(), GetSelectedSaveDirName(), currentSelectedSave, param.GetPspParam()->mode == SCE_UTILITY_SAVEDATA_TYPE_READDATASECURE);
if(result == SCE_UTILITY_SAVEDATA_ERROR_LOAD_NO_DATA)
if (result == SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN)
result = SCE_UTILITY_SAVEDATA_ERROR_RW_DATA_BROKEN;
if (result == SCE_UTILITY_SAVEDATA_ERROR_LOAD_NO_DATA)
result = SCE_UTILITY_SAVEDATA_ERROR_RW_NO_DATA;
break;
case SCE_UTILITY_SAVEDATA_TYPE_ERASE:

View File

@ -589,15 +589,22 @@ int SavedataParam::Load(SceUtilitySavedataParam *param, const std::string &saveD
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_NO_DATA : SCE_UTILITY_SAVEDATA_ERROR_LOAD_NO_DATA;
}
if (fileName != "" && !pspFileSystem.GetFileInfo(filePath).exists) {
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_FILE_NOT_FOUND : SCE_UTILITY_SAVEDATA_ERROR_LOAD_FILE_NOT_FOUND;
}
// If it wasn't zero, force to zero before loading and especially in case of error.
// This isn't reset if the path doesn't even exist.
param->dataSize = 0;
int result = LoadSaveData(param, saveDirName, dirPath, secureMode);
if (result != 0)
return result;
// Load sfo
if (!LoadSFO(param, dirPath)) {
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_DATA_BROKEN : SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN;
}
if (fileName != "" && !pspFileSystem.GetFileInfo(filePath).exists) {
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_FILE_NOT_FOUND : SCE_UTILITY_SAVEDATA_ERROR_LOAD_FILE_NOT_FOUND;
}
// Don't know what it is, but PSP always respond this and this unlock some game
param->bind = 1021;
@ -612,15 +619,6 @@ int SavedataParam::Load(SceUtilitySavedataParam *param, const std::string &saveD
// Load SND0.AT3
LoadFile(dirPath, SND0_FILENAME, &param->snd0FileData);
if (fileName == "") {
// Don't load savedata but return success.
return 0;
}
int result = LoadSaveData(param, saveDirName, dirPath, secureMode);
if (result != 0)
return result;
return 0;
}
@ -638,6 +636,10 @@ int SavedataParam::LoadSaveData(SceUtilitySavedataParam *param, const std::strin
std::string filename = GetFileName(param);
std::string filePath = dirPath + "/" + filename;
// Blank filename always means success, if secureVersion was correct.
if (filename == "")
return 0;
s64 readSize;
INFO_LOG(SCEUTILITY, "Loading file with size %u in %s", param->dataBufSize, filePath.c_str());
u8 *saveData = nullptr;
@ -667,14 +669,18 @@ int SavedataParam::LoadSaveData(SceUtilitySavedataParam *param, const std::strin
if (!saveDone) {
loadedSize = LoadNotCryptedSave(param, param->dataBuf, saveData, saveSize);
}
param->dataSize = (SceSize)saveSize;
delete[] saveData;
if (loadedSize != 0) {
// Ignore error codes.
if (loadedSize != 0 && (loadedSize & 0x80000000) == 0) {
std::string tag = "LoadSaveData/" + filePath;
NotifyMemInfo(MemBlockFlags::WRITE, param->dataBuf.ptr, loadedSize, tag.c_str(), tag.size());
}
if ((loadedSize & 0x80000000) != 0)
return loadedSize;
param->dataSize = (SceSize)saveSize;
return 0;
}
@ -760,8 +766,12 @@ u32 SavedataParam::LoadCryptedSave(SceUtilitySavedataParam *param, u8 *data, con
u32 sz = 0;
if (err == 0) {
if (param->dataBuf.IsValid()) {
sz = std::min((u32)saveSize, (u32)param->dataBufSize);
memcpy(data, data_base, sz);
if ((u32)saveSize > param->dataBufSize || !Memory::IsValidRange(param->dataBuf.ptr, saveSize)) {
sz = SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN;
} else {
sz = (u32)saveSize;
memcpy(data, data_base, sz);
}
}
saveDone = true;
}
@ -773,9 +783,11 @@ u32 SavedataParam::LoadCryptedSave(SceUtilitySavedataParam *param, u8 *data, con
u32 SavedataParam::LoadNotCryptedSave(SceUtilitySavedataParam *param, u8 *data, u8 *saveData, int &saveSize) {
if (param->dataBuf.IsValid()) {
u32 sz = std::min((u32)saveSize, (u32)param->dataBufSize);
memcpy(data, saveData, sz);
return sz;
if ((u32)saveSize > param->dataBufSize || !Memory::IsValidRange(param->dataBuf.ptr, saveSize)) {
return SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN;
}
memcpy(data, saveData, saveSize);
return saveSize;
}
return 0;
}

View File

@ -132,10 +132,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
break;
}
float texturePixels = 256.0f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.0f;
}
float texturePixels = 512.0f;
if (shift) {
writer.F(" index = (int(uint(index) >> uint(%d)) & 0x%02x)", shift, mask);
@ -278,11 +275,9 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
break;
}
float texturePixels = 256.f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
index_multiplier *= 0.5f;
}
// We always use 512-sized textures now.
float texturePixels = 512.f;
index_multiplier *= 0.5f;
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
// index_multiplier -= 0.01f / texturePixels;
@ -326,11 +321,7 @@ void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
}
writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);
float texturePixels = 256.f;
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
texturePixels = 512.f;
}
float texturePixels = 512.f;
writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
}

View File

@ -640,7 +640,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
p.C(" if (depalShift == 5u) { index0 = t.g; }\n");
p.C(" else if (depalShift == 10u) { index0 = t.b; }\n");
p.C(" }\n");
p.F(" t = ").SampleTexture2D("pal", "vec2(index0 * factor, 0.0)").C(";\n");
p.F(" t = ").SampleTexture2D("pal", "vec2(index0 * factor * 0.5, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT.
break;
case ShaderDepalMode::NORMAL:
if (doTextureProjection) {

View File

@ -666,6 +666,10 @@ static const char *reinterpretStrings[4][4] = {
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
if (!useBufferedRendering_) {
return;
}
std::vector<CopySource> sources;
for (auto src : vfbs_) {
// Discard old and equal potential inputs.
@ -773,30 +777,17 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
WARN_LOG_ONCE(bta, G3D, "WARNING: Reinterpret encountered with BlueToAlpha on");
}
if (IsBufferFormat16Bit(src->fb_format) && !IsBufferFormat16Bit(dst->fb_format)) {
// We halve the X coordinates in the destination framebuffer.
// The shader will collect two pixels worth of input data and merge into one.
dstX1 *= 0.5f;
dstX2 *= 0.5f;
} else if (!IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
// We double the X coordinates in the destination framebuffer.
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
dstX1 *= 2.0f;
dstX2 *= 2.0f;
}
// Reinterpret!
WARN_LOG_N_TIMES(reint, 5, G3D, "Reinterpret detected from %08x_%s to %08x_%s",
src->fb_address, GeBufferFormatToString(src->fb_format),
dst->fb_address, GeBufferFormatToString(dst->fb_format));
pipeline = reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format];
float scaleFactorX = 1.0f;
pipeline = GetReinterpretPipeline(src->fb_format, dst->fb_format, &scaleFactorX);
dstX1 *= scaleFactorX;
dstX2 *= scaleFactorX;
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
if (!pipeline) {
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
return GenerateReinterpretFragmentShader(shaderWriter, src->fb_format, dst->fb_format);
});
reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format] = pipeline;
}
gpuStats.numReinterpretCopies++;
}
@ -819,6 +810,27 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
textureCache_->ForgetLastTexture();
}
Draw2DPipeline *FramebufferManagerCommon::GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX) {
if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
// We halve the X coordinates in the destination framebuffer.
// The shader will collect two pixels worth of input data and merge into one.
*scaleFactorX = 0.5f;
} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
// We double the X coordinates in the destination framebuffer.
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
*scaleFactorX = 2.0f;
}
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)from][(int)to];
if (!pipeline) {
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
return GenerateReinterpretFragmentShader(shaderWriter, from, to);
});
reinterpretFromTo_[(int)from][(int)to] = pipeline;
}
return pipeline;
}
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
// Notify the texture cache of both the color and depth buffers.
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
@ -1069,7 +1081,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
// currentRenderVfb_ will always be set when this is called, except from the GE debugger.
// Let's just not bother with the copy in that case.
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY) || GPUStepping::IsStepping();
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY);
// Currently rendering to this framebuffer. Need to make a copy.
if (!skipCopy && framebuffer == currentRenderVfb_) {

View File

@ -421,14 +421,15 @@ public:
// Returns the resolved framebuffer.
VirtualFramebuffer *ResolveFramebufferColorToFormat(VirtualFramebuffer *vfb, GEBufferFormat newFormat);
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
Draw2DPipeline *GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX);
protected:
virtual void PackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
void SetViewport2D(int x, int y, int w, int h);
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
void CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dest);
void CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest);

View File

@ -856,7 +856,7 @@ static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) {
// Try to simulate some common logic ops by using blend, if needed.
// The shader might also need modification, the below function SimulateLogicOpShaderTypeIfNeeded
// takes care of that.
static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
@ -866,7 +866,7 @@ static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend
srcBlend = BlendFactor::ZERO;
dstBlend = BlendFactor::ZERO;
blendEq = BlendEq::ADD;
break;
return true;
case GE_LOGIC_AND:
case GE_LOGIC_AND_REVERSE:
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
@ -889,21 +889,23 @@ static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::SUBTRACT;
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
break;
return true;
case GE_LOGIC_NOOP:
srcBlend = BlendFactor::ZERO;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
break;
return true;
case GE_LOGIC_XOR:
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
break;
case GE_LOGIC_OR:
case GE_LOGIC_OR_INVERTED:
// Inverted in shader.
srcBlend = BlendFactor::ONE;
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
break;
return true;
case GE_LOGIC_OR_REVERSE:
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
break;
@ -912,10 +914,12 @@ static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend
dstBlend = BlendFactor::ONE;
blendEq = BlendEq::ADD;
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
break;
return true;
}
}
}
return false;
}
// Choose the shader part of the above logic op fallback simulation.
@ -950,7 +954,6 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
BlendFactor srcBlend = BlendFactor::ONE;
BlendFactor dstBlend = BlendFactor::ZERO;
BlendEq blendEq = BlendEq::ADD;
SimulateLogicOpIfNeeded(srcBlend, dstBlend, blendEq);
// We're not blending, but we may still want to "blend" for stencil.
// This is only useful for INCR/DECR/INVERT. Others can write directly.
@ -1058,8 +1061,10 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.framebufFormat);
if (forceReplaceBlend) {
replaceBlend = REPLACE_BLEND_READ_FRAMEBUFFER;
// Enforce blend replacement if enabled. If not, shouldn't do anything of course.
replaceBlend = gstate.isAlphaBlendEnabled() ? REPLACE_BLEND_READ_FRAMEBUFFER : REPLACE_BLEND_NO;
}
blendState.replaceBlend = replaceBlend;
blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();
@ -1250,11 +1255,6 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
colorEq = eqLookupNoMinMax[blendFuncEq];
}
// Attempt to apply simulated logic ops, if any and if needed.
if (!forceReplaceBlend) {
SimulateLogicOpIfNeeded(glBlendFuncA, glBlendFuncB, colorEq);
}
// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
// do any blending in the alpha channel as that doesn't seem to happen on PSP. So, we attempt to
// apply the stencil to the alpha, since that's what should be stored.
@ -1579,5 +1579,20 @@ void ComputedPipelineState::Convert(bool shaderBitOpsSuppported) {
if (blendState.applyFramebufferRead || logicState.applyFramebufferRead) {
maskState.ConvertToShaderBlend();
logicState.ConvertToShaderBlend();
} else {
// If it isn't a read, we may need to change blending to apply the logic op.
logicState.ApplyToBlendState(blendState);
}
}
void GenericLogicState::ApplyToBlendState(GenericBlendState &blendState) {
if (SimulateLogicOpIfNeeded(blendState.srcColor, blendState.dstColor, blendState.eqColor)) {
if (!blendState.blendEnabled) {
// If it wasn't turned on, make sure it is now.
blendState.blendEnabled = true;
blendState.srcAlpha = BlendFactor::ONE;
blendState.dstAlpha = BlendFactor::ZERO;
blendState.eqAlpha = BlendEq::ADD;
}
}
}

View File

@ -226,6 +226,7 @@ struct GenericLogicState {
// Hardware and shader generation
GELogicOp logicOp;
void ApplyToBlendState(GenericBlendState &blendState);
void ConvertToShaderBlend() {
if (logicOp != GE_LOGIC_COPY) {
logicOpEnabled = false;
@ -245,7 +246,9 @@ struct ComputedPipelineState {
void Convert(bool shaderBitOpsSupported);
bool FramebufferRead() const {
return blendState.applyFramebufferRead;
// If blending is off, its applyFramebufferRead can be false even after state propagation.
// So it's not enough to check just that one.
return blendState.applyFramebufferRead || maskState.applyFramebufferRead || logicState.applyFramebufferRead;
}
};

View File

@ -1471,10 +1471,13 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
}
}
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool expandTo32bit) {
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags) {
u32 alphaSum = 0xFFFFFFFF;
u32 fullAlphaMask = 0x0;
bool expandTo32bit = (flags & TexDecodeFlags::EXPAND32) != 0;
bool reverseColors = (flags & TexDecodeFlags::REVERSE_COLORS) != 0;
bool swizzled = gstate.isTextureSwizzled();
if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) {
// This means it's in a mirror, possibly a swizzled mirror. Let's report.
@ -2459,7 +2462,7 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
return true;
}
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, bool reverseColors) {
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) {
int w = gstate.getTextureWidth(srcLevel);
int h = gstate.getTextureHeight(srcLevel);
@ -2486,9 +2489,11 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
decPitch = stride;
}
bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM;
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM) {
texDecFlags |= TexDecodeFlags::EXPAND32;
}
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, reverseColors, expand32);
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, texDecFlags);
entry.SetAlphaStatus(alphaResult, srcLevel);
if (scaleFactor > 1) {

View File

@ -50,6 +50,12 @@ struct VirtualFramebuffer;
class TextureReplacer;
class ShaderManagerCommon;
enum class TexDecodeFlags {
EXPAND32 = 1,
REVERSE_COLORS = 2,
};
ENUM_CLASS_BITOPS(TexDecodeFlags);
namespace Draw {
class DrawContext;
class Texture;
@ -354,13 +360,13 @@ protected:
virtual void BindAsClutTexture(Draw::Texture *tex, bool smooth) {}
CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool expandTo32Bit);
CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags);
void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
CheckAlphaResult ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool reverseColors, bool expandTo32Bit);
ReplacedTexture &FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d);
// Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory.
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, bool reverseColors);
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);
template <typename T>
inline const T *GetCurrentClut() {

View File

@ -68,7 +68,7 @@ ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const
ClutTexture *tex = new ClutTexture();
Draw::TextureDesc desc{};
desc.width = maxClutEntries;
desc.width = 512; // We always use 512-sized textures here for simplicity, though the most common is that only up to 256 entries are used.
desc.height = 1;
desc.depth = 1;
desc.mipLevels = 1;

View File

@ -378,7 +378,7 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
return;
}
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, texFmt, false);
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, texFmt, TexDecodeFlags{});
if (plan.depth == 1) {
context_->UpdateSubresource(texture, i, nullptr, data, stride, 0);
} else {

View File

@ -290,8 +290,6 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
return;
}
Draw::DataFormat texFmt = FromD3D9Format(dstFmt);
if (plan.depth == 1) {
// Regular loop.
for (int i = 0; i < levels; i++) {
@ -307,7 +305,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
}
uint8_t *data = (uint8_t *)rect.pBits;
int stride = rect.Pitch;
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false);
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, FromD3D9Format(dstFmt), TexDecodeFlags{});
((LPDIRECT3DTEXTURE9)texture)->UnlockRect(dstLevel);
}
} else {
@ -322,7 +320,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
uint8_t *data = (uint8_t *)box.pBits;
int stride = box.RowPitch;
for (int i = 0; i < plan.depth; i++) {
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false);
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, FromD3D9Format(dstFmt), TexDecodeFlags{});
data += box.SlicePitch;
}
((LPDIRECT3DVOLUMETEXTURE9)texture)->UnlockBox(0);

View File

@ -325,7 +325,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
return;
}
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, dstFmt, true);
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, dstFmt, TexDecodeFlags::REVERSE_COLORS);
// NOTE: TextureImage takes ownership of data, so we don't free it afterwards.
render_->TextureImage(entry->textureName, i, mipWidth, mipHeight, 1, dstFmt, data, GLRAllocType::ALIGNED);
@ -344,7 +344,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
u8 *p = data;
for (int i = 0; i < plan.depth; i++) {
LoadTextureLevel(*entry, p, stride, *plan.replaced, i, plan.scaleFactor, dstFmt, true);
LoadTextureLevel(*entry, p, stride, *plan.replaced, i, plan.scaleFactor, dstFmt, TexDecodeFlags::REVERSE_COLORS);
p += levelStride;
}

View File

@ -57,22 +57,10 @@ inline static T VecClamp(const T &v, const T &low, const T &high)
}
template<typename T>
class Vec2
{
class Vec2 {
public:
union
{
struct
{
T x,y;
};
#if defined(_M_SSE)
__m128i ivec;
__m128 vec;
#elif PPSSPP_ARCH(ARM64_NEON)
int32x4_t ivec;
float32x4_t vec;
#endif
struct {
T x,y;
};
T* AsArray() { return &x; }
@ -81,15 +69,6 @@ public:
Vec2() {}
Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
#if defined(_M_SSE)
Vec2(const __m128 &_vec) : vec(_vec) {}
Vec2(const __m128i &_ivec) : ivec(_ivec) {}
#elif PPSSPP_ARCH(ARM64_NEON)
Vec2(const float32x4_t &_vec) : vec(_vec) {}
#if !defined(_MSC_VER)
Vec2(const int32x4_t &_ivec) : ivec(_ivec) {}
#endif
#endif
template<typename T2>
Vec2<T2> Cast() const

View File

@ -242,7 +242,7 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
if (!state.enableTextures)
return false;
const int textureBits = textureBitsPerPixel[state.samplerID.texfmt];
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
for (int i = 0; i <= state.maxTexLevel; ++i) {
int byteStride = (state.texbufw[i] * textureBits) / 8;
int byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;

View File

@ -78,14 +78,6 @@ static inline Vec3<int> Interpolate(const Vec3<int> &c0, const Vec3<int> &c1, co
#endif
}
static inline Vec2<float> Interpolate(const Vec2<float> &c0, const Vec2<float> &c1, const Vec2<float> &c2, int w0, int w1, int w2, float wsum) {
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
return Vec2<float>(Interpolate(c0.vec, c1.vec, c2.vec, w0, w1, w2, wsum));
#else
return (c0 * w0 + c1 * w1 + c2 * w2) * wsum;
#endif
}
static inline Vec4<float> Interpolate(const float &c0, const float &c1, const float &c2, const Vec4<float> &w0, const Vec4<float> &w1, const Vec4<float> &w2, const Vec4<float> &wsum_recip) {
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
__m128 v = _mm_mul_ps(w0.vec, _mm_set1_ps(c0));
@ -124,7 +116,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
for (uint8_t i = 0; i <= state->maxTexLevel; i++) {
u32 texaddr = gstate.getTextureAddress(i);
state->texaddr[i] = texaddr;
state->texbufw[i] = GetTextureBufw(i, texaddr, texfmt);
state->texbufw[i] = (uint16_t)GetTextureBufw(i, texaddr, texfmt);
if (Memory::IsValidAddress(texaddr))
state->texptr[i] = Memory::GetPointerUnchecked(texaddr);
else
@ -143,9 +135,6 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
state->throughMode = throughMode;
state->antialiasLines = gstate.isAntiAliasEnabled();
state->screenOffsetX = gstate.getOffsetX16();
state->screenOffsetY = gstate.getOffsetY16();
#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
DisplayList currentList{};
if (gpuDebug)
@ -421,7 +410,7 @@ Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &sourc
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) {
const u8 **tptr0 = const_cast<const u8 **>(&state.texptr[texlevel]);
const int *bufw0 = &state.texbufw[texlevel];
const uint16_t *bufw0 = &state.texbufw[texlevel];
if (!bilinear) {
return state.nearest(s, t, x, y, prim_color, tptr0, bufw0, texlevel, frac_texlevel, state.samplerID);
@ -1476,7 +1465,7 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level)
GETextureFormat texfmt = gstate.getTextureFormat();
u32 texaddr = gstate.getTextureAddress(level);
int texbufw = GetTextureBufw(level, texaddr, texfmt);
u32 texbufw = GetTextureBufw(level, texaddr, texfmt);
int w = gstate.getTextureWidth(level);
int h = gstate.getTextureHeight(level);

View File

@ -39,11 +39,9 @@ struct RasterizerState {
Sampler::LinearFunc linear;
Sampler::NearestFunc nearest;
uint32_t texaddr[8]{};
int texbufw[8]{};
uint16_t texbufw[8]{};
const u8 *texptr[8]{};
float textureLodSlope;
int screenOffsetX;
int screenOffsetY;
struct {
uint8_t maxTexLevel : 3;

View File

@ -103,7 +103,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
const u8 *texptr = state.texptr[0];
GETextureFormat texfmt = state.samplerID.TexFmt();
int texbufw = state.texbufw[0];
uint16_t texbufw = state.texbufw[0];
Sampler::FetchFunc fetchFunc = Sampler::GetFetchFunc(state.samplerID);
auto &pixelID = state.pixelID;
@ -300,9 +300,9 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) {
// check for save/load dialog.
if (!currentDialogActive) {
if (v0.screenpos.x + state.screenOffsetX == 0x7100 && v0.screenpos.y + state.screenOffsetY == 0x7780 && v1.screenpos.x + state.screenOffsetX == 0x8f00 && v1.screenpos.y + state.screenOffsetY == 0x8880) {
if (v0.screenpos.x + gstate.getOffsetX16() == 0x7100 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8f00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {
g_DarkStalkerStretch = DSStretch::Wide;
} else if (v0.screenpos.x + state.screenOffsetX == 0x7400 && v0.screenpos.y + state.screenOffsetY == 0x7780 && v1.screenpos.x + state.screenOffsetX == 0x8C00 && v1.screenpos.y + state.screenOffsetY == 0x8880) {
} else if (v0.screenpos.x + gstate.getOffsetX16() == 0x7400 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8C00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {
g_DarkStalkerStretch = DSStretch::Normal;
} else {
return false;

View File

@ -38,8 +38,8 @@ using namespace Rasterizer;
namespace Sampler {
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
std::mutex jitCacheLock;
@ -281,7 +281,7 @@ struct Nearest4 {
};
template <int N>
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, int texbufw, int level, const SamplerID &samplerID) {
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, uint16_t texbufw, int level, const SamplerID &samplerID) {
Nearest4 res;
if (!srcptr) {
memset(res.v, 0, sizeof(res.v));
@ -535,7 +535,7 @@ Vec4IntResult SOFTRAST_CALL GetTextureFunctionOutput(Vec4IntArg prim_color_in, V
return ToVec4IntResult(Vec4<int>(out_rgb, out_a));
}
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID) {
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID) {
int u, v;
// Nearest filtering only. Round texcoords.
@ -631,7 +631,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, fl
return ApplyTexelClampQuadT(samplerID.clampT, base_v, height);
}
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const int *bufw, int texlevel, const SamplerID &samplerID) {
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const uint16_t *bufw, int texlevel, const SamplerID &samplerID) {
int frac_u, frac_v;
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x, samplerID);
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y, samplerID);
@ -646,7 +646,7 @@ static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, in
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
}
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel, samplerID);
if (levelFrac) {
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1, samplerID);

View File

@ -36,10 +36,10 @@ namespace Sampler {
typedef Rasterizer::Vec4IntResult(SOFTRAST_CALL *FetchFunc)(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
FetchFunc GetFetchFunc(SamplerID id);
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *NearestFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *NearestFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
NearestFunc GetNearestFunc(SamplerID id);
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *LinearFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *LinearFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
LinearFunc GetLinearFunc(SamplerID id);
void Init();

View File

@ -246,7 +246,7 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
auto loadPtrs = [&](bool level1) {
X64Reg bufwReg = regCache_.Alloc(RegCache::GEN_ARG_BUFW);
X64Reg bufwPtrReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
MOV(32, R(bufwReg), MDisp(bufwPtrReg, level1 ? 4 : 0));
MOVZX(32, 16, bufwReg, MDisp(bufwPtrReg, level1 ? 2 : 0));
regCache_.Unlock(bufwPtrReg, RegCache::GEN_ARG_BUFW_PTR);
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW);
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW);
@ -713,7 +713,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR_PTR);
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
ADD(64, R(srcArgReg), MDisp(srcReg, level1 ? 8 : 0));
MOV(32, R(bufwArgReg), MDisp(bufwReg, level1 ? 4 : 0));
MOVZX(32, 16, bufwArgReg, MDisp(bufwReg, level1 ? 2 : 0));
// Leave level/levelFrac, we just always load from RAM on Windows and lock on POSIX.
regCache_.Unlock(srcReg, RegCache::GEN_ARG_TEXPTR_PTR);
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
@ -2995,12 +2995,13 @@ bool SamplerJitCache::Jit_PrepareDataDirectOffsets(const SamplerID &id, RegCache
if (!id.useStandardBufw || id.hasAnyMips) {
// Spread bufw into each lane.
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
if (cpu_info.bAVX2) {
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
if (cpu_info.bSSE4_1) {
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
} else {
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
PXOR(bufwVecReg, R(bufwVecReg));
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
}
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
if (bitsPerTexel == 4)
@ -3070,12 +3071,13 @@ bool SamplerJitCache::Jit_PrepareDataSwizzledOffsets(const SamplerID &id, RegCac
if (!id.useStandardBufw || id.hasAnyMips) {
// Spread bufw into each lane.
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
if (cpu_info.bAVX2) {
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
if (cpu_info.bSSE4_1) {
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
} else {
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
PXOR(bufwVecReg, R(bufwVecReg));
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
}
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
}
@ -3162,12 +3164,13 @@ bool SamplerJitCache::Jit_PrepareDataDXTOffsets(const SamplerID &id, Rasterizer:
if (!id.useStandardBufw || id.hasAnyMips) {
// Spread bufw into each lane.
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
if (cpu_info.bAVX2) {
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
if (cpu_info.bSSE4_1) {
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
} else {
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
PXOR(bufwVecReg, R(bufwVecReg));
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
}
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
// Divide by 4 before the multiply.

View File

@ -164,6 +164,15 @@ public:
TransformUnit transformUnit;
#if PPSSPP_ARCH(32BIT)
void *operator new(size_t s) {
return AllocateAlignedMemory(s, 16);
}
void operator delete(void *p) {
FreeAlignedMemory(p);
}
#endif
protected:
bool UpdateUseHWTessellation(bool enable) override { return false; }
};

View File

@ -44,7 +44,8 @@
// Most drivers treat vkCreateShaderModule as pretty much a memcpy. What actually
// takes time here, and makes this worthy of parallelization, is GLSLtoSPV.
Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code) {
// Takes ownership over tag.
static Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code, std::string *tag) {
auto compile = [=] {
PROFILE_THIS_SCOPE("shadercomp");
@ -59,12 +60,13 @@ Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShade
} else {
ERROR_LOG(G3D, "Error in shader compilation!");
}
std::string numberedSource = LineNumberString(code);
ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str());
ERROR_LOG(G3D, "Shader source:\n%s", code);
#ifdef SHADERLOG
OutputDebugStringA(LineNumberString(code).c_str());
ERROR_LOG(G3D, "Shader source:\n%s", numberedSource.c_str());
#if PPSSPP_PLATFORM(WINDOWS)
OutputDebugStringA("Error messages:\n");
OutputDebugStringA(errorMessage.c_str());
OutputDebugStringA(numberedSource.c_str());
#endif
Reporting::ReportMessage("Vulkan error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code);
}
@ -75,6 +77,10 @@ Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShade
#ifdef SHADERLOG
OutputDebugStringA("OK");
#endif
if (tag) {
vulkan->SetDebugName(shaderModule, VK_OBJECT_TYPE_SHADER_MODULE, tag->c_str());
delete tag;
}
}
return shaderModule;
@ -92,7 +98,7 @@ Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShade
VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, FragmentShaderFlags flags, const char *code)
: vulkan_(vulkan), id_(id), flags_(flags) {
source_ = code;
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_FRAGMENT_BIT, source_.c_str());
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_FRAGMENT_BIT, source_.c_str(), new std::string(FragmentShaderDesc(id)));
if (!module_) {
failed_ = true;
} else {
@ -122,7 +128,7 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co
VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, const char *code, bool useHWTransform)
: vulkan_(vulkan), useHWTransform_(useHWTransform), id_(id) {
source_ = code;
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str());
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id).c_str()));
if (!module_) {
failed_ = true;
} else {

View File

@ -728,7 +728,10 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
u32 *pixelData;
int decPitch;
bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT;
TexDecodeFlags texDecFlags{};
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT) {
texDecFlags |= TexDecodeFlags::EXPAND32;
}
if (scaleFactor > 1) {
tmpTexBufRearrange_.resize(std::max(bufw, w) * h);
@ -740,7 +743,7 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
decPitch = rowPitch;
}
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, expand32);
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, texDecFlags);
entry.SetAlphaStatus(alphaResult, level);
if (scaleFactor > 1) {

View File

@ -374,8 +374,7 @@ enum GEMatrixType {
GE_MTX_TEXGEN,
};
enum GEComparison
{
enum GEComparison : uint8_t {
GE_COMP_NEVER = 0,
GE_COMP_ALWAYS = 1,
GE_COMP_EQUAL = 2,
@ -578,8 +577,7 @@ enum GEPrimitiveType
GE_PRIM_INVALID = -1,
};
enum GELogicOp
{
enum GELogicOp : uint8_t {
GE_LOGIC_CLEAR = 0,
GE_LOGIC_AND = 1,
GE_LOGIC_AND_REVERSE = 2,

View File

@ -251,7 +251,7 @@ private:
return nullptr;
}
const auto recentIsos = g_Config.RecentIsos();
if (index >= recentIsos.size())
if (index >= (int)recentIsos.size())
return nullptr;
return g_gameInfoCache->GetInfo(dc.GetDrawContext(), Path(recentIsos[index]), GAMEINFO_WANTBG);
}

View File

@ -48,7 +48,7 @@ static bool TestSamplerJit() {
bool header = false;
u8 **tptr = new u8 *[8];
int *bufw = new int[8];
uint16_t *bufw = new uint16_t[8];
u8 *clut = new u8[1024];
memset(clut, 0, 1024);