mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-24 14:00:03 +00:00
Merge branch 'master' into feature_openxr_vulkan
This commit is contained in:
commit
f98381fb57
@ -47,12 +47,12 @@ struct TinySet {
|
||||
size_t otherSize = other.size();
|
||||
if (size() + otherSize <= MaxFastSize) {
|
||||
// Fast case
|
||||
for (int i = 0; i < otherSize; i++) {
|
||||
for (size_t i = 0; i < otherSize; i++) {
|
||||
fastLookup_[fastCount + i] = other.fastLookup_[i];
|
||||
}
|
||||
fastCount += other.fastCount;
|
||||
} else {
|
||||
for (int i = 0; i < otherSize; i++) {
|
||||
for (size_t i = 0; i < otherSize; i++) {
|
||||
push_back(other[i]);
|
||||
}
|
||||
}
|
||||
|
@ -1219,7 +1219,7 @@ bool OpenGLPipeline::LinkShaders() {
|
||||
}
|
||||
std::vector<GLRProgram::Initializer> initialize;
|
||||
for (int i = 0; i < MAX_TEXTURE_SLOTS; ++i) {
|
||||
if (i < samplers_.size()) {
|
||||
if (i < (int)samplers_.size()) {
|
||||
initialize.push_back({ &samplerLocs_[i], 0, i });
|
||||
} else {
|
||||
samplerLocs_[i] = -1;
|
||||
|
@ -1086,7 +1086,7 @@ Pipeline *VKContext::CreateGraphicsPipeline(const PipelineDesc &desc, const char
|
||||
_dbg_assert_((int)input->attributes.size() == (int)input->visc.vertexAttributeDescriptionCount);
|
||||
|
||||
gDesc.ibd = input->bindings[0];
|
||||
for (int i = 0; i < input->attributes.size(); i++) {
|
||||
for (size_t i = 0; i < input->attributes.size(); i++) {
|
||||
gDesc.attrs[i] = input->attributes[i];
|
||||
}
|
||||
gDesc.vis.vertexAttributeDescriptionCount = input->visc.vertexAttributeDescriptionCount;
|
||||
|
@ -1149,7 +1149,9 @@ void PSPSaveDialog::ExecuteNotVisibleIOAction() {
|
||||
case SCE_UTILITY_SAVEDATA_TYPE_READDATA:
|
||||
case SCE_UTILITY_SAVEDATA_TYPE_READDATASECURE:
|
||||
result = param.Load(param.GetPspParam(), GetSelectedSaveDirName(), currentSelectedSave, param.GetPspParam()->mode == SCE_UTILITY_SAVEDATA_TYPE_READDATASECURE);
|
||||
if(result == SCE_UTILITY_SAVEDATA_ERROR_LOAD_NO_DATA)
|
||||
if (result == SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN)
|
||||
result = SCE_UTILITY_SAVEDATA_ERROR_RW_DATA_BROKEN;
|
||||
if (result == SCE_UTILITY_SAVEDATA_ERROR_LOAD_NO_DATA)
|
||||
result = SCE_UTILITY_SAVEDATA_ERROR_RW_NO_DATA;
|
||||
break;
|
||||
case SCE_UTILITY_SAVEDATA_TYPE_ERASE:
|
||||
|
@ -589,15 +589,22 @@ int SavedataParam::Load(SceUtilitySavedataParam *param, const std::string &saveD
|
||||
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_NO_DATA : SCE_UTILITY_SAVEDATA_ERROR_LOAD_NO_DATA;
|
||||
}
|
||||
|
||||
if (fileName != "" && !pspFileSystem.GetFileInfo(filePath).exists) {
|
||||
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_FILE_NOT_FOUND : SCE_UTILITY_SAVEDATA_ERROR_LOAD_FILE_NOT_FOUND;
|
||||
}
|
||||
|
||||
// If it wasn't zero, force to zero before loading and especially in case of error.
|
||||
// This isn't reset if the path doesn't even exist.
|
||||
param->dataSize = 0;
|
||||
int result = LoadSaveData(param, saveDirName, dirPath, secureMode);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Load sfo
|
||||
if (!LoadSFO(param, dirPath)) {
|
||||
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_DATA_BROKEN : SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN;
|
||||
}
|
||||
|
||||
if (fileName != "" && !pspFileSystem.GetFileInfo(filePath).exists) {
|
||||
return isRWMode ? SCE_UTILITY_SAVEDATA_ERROR_RW_FILE_NOT_FOUND : SCE_UTILITY_SAVEDATA_ERROR_LOAD_FILE_NOT_FOUND;
|
||||
}
|
||||
|
||||
// Don't know what it is, but PSP always respond this and this unlock some game
|
||||
param->bind = 1021;
|
||||
|
||||
@ -612,15 +619,6 @@ int SavedataParam::Load(SceUtilitySavedataParam *param, const std::string &saveD
|
||||
// Load SND0.AT3
|
||||
LoadFile(dirPath, SND0_FILENAME, ¶m->snd0FileData);
|
||||
|
||||
if (fileName == "") {
|
||||
// Don't load savedata but return success.
|
||||
return 0;
|
||||
}
|
||||
|
||||
int result = LoadSaveData(param, saveDirName, dirPath, secureMode);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -638,6 +636,10 @@ int SavedataParam::LoadSaveData(SceUtilitySavedataParam *param, const std::strin
|
||||
|
||||
std::string filename = GetFileName(param);
|
||||
std::string filePath = dirPath + "/" + filename;
|
||||
// Blank filename always means success, if secureVersion was correct.
|
||||
if (filename == "")
|
||||
return 0;
|
||||
|
||||
s64 readSize;
|
||||
INFO_LOG(SCEUTILITY, "Loading file with size %u in %s", param->dataBufSize, filePath.c_str());
|
||||
u8 *saveData = nullptr;
|
||||
@ -667,14 +669,18 @@ int SavedataParam::LoadSaveData(SceUtilitySavedataParam *param, const std::strin
|
||||
if (!saveDone) {
|
||||
loadedSize = LoadNotCryptedSave(param, param->dataBuf, saveData, saveSize);
|
||||
}
|
||||
param->dataSize = (SceSize)saveSize;
|
||||
delete[] saveData;
|
||||
|
||||
if (loadedSize != 0) {
|
||||
// Ignore error codes.
|
||||
if (loadedSize != 0 && (loadedSize & 0x80000000) == 0) {
|
||||
std::string tag = "LoadSaveData/" + filePath;
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, param->dataBuf.ptr, loadedSize, tag.c_str(), tag.size());
|
||||
}
|
||||
|
||||
if ((loadedSize & 0x80000000) != 0)
|
||||
return loadedSize;
|
||||
|
||||
param->dataSize = (SceSize)saveSize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -760,8 +766,12 @@ u32 SavedataParam::LoadCryptedSave(SceUtilitySavedataParam *param, u8 *data, con
|
||||
u32 sz = 0;
|
||||
if (err == 0) {
|
||||
if (param->dataBuf.IsValid()) {
|
||||
sz = std::min((u32)saveSize, (u32)param->dataBufSize);
|
||||
memcpy(data, data_base, sz);
|
||||
if ((u32)saveSize > param->dataBufSize || !Memory::IsValidRange(param->dataBuf.ptr, saveSize)) {
|
||||
sz = SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN;
|
||||
} else {
|
||||
sz = (u32)saveSize;
|
||||
memcpy(data, data_base, sz);
|
||||
}
|
||||
}
|
||||
saveDone = true;
|
||||
}
|
||||
@ -773,9 +783,11 @@ u32 SavedataParam::LoadCryptedSave(SceUtilitySavedataParam *param, u8 *data, con
|
||||
|
||||
u32 SavedataParam::LoadNotCryptedSave(SceUtilitySavedataParam *param, u8 *data, u8 *saveData, int &saveSize) {
|
||||
if (param->dataBuf.IsValid()) {
|
||||
u32 sz = std::min((u32)saveSize, (u32)param->dataBufSize);
|
||||
memcpy(data, saveData, sz);
|
||||
return sz;
|
||||
if ((u32)saveSize > param->dataBufSize || !Memory::IsValidRange(param->dataBuf.ptr, saveSize)) {
|
||||
return SCE_UTILITY_SAVEDATA_ERROR_LOAD_DATA_BROKEN;
|
||||
}
|
||||
memcpy(data, saveData, saveSize);
|
||||
return saveSize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -132,10 +132,7 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
|
||||
break;
|
||||
}
|
||||
|
||||
float texturePixels = 256.0f;
|
||||
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
texturePixels = 512.0f;
|
||||
}
|
||||
float texturePixels = 512.0f;
|
||||
|
||||
if (shift) {
|
||||
writer.F(" index = (int(uint(index) >> uint(%d)) & 0x%02x)", shift, mask);
|
||||
@ -278,11 +275,9 @@ void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {
|
||||
break;
|
||||
}
|
||||
|
||||
float texturePixels = 256.f;
|
||||
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
texturePixels = 512.f;
|
||||
index_multiplier *= 0.5f;
|
||||
}
|
||||
// We always use 512-sized textures now.
|
||||
float texturePixels = 512.f;
|
||||
index_multiplier *= 0.5f;
|
||||
|
||||
// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.
|
||||
// index_multiplier -= 0.01f / texturePixels;
|
||||
@ -326,11 +321,7 @@ void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
|
||||
}
|
||||
|
||||
writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);
|
||||
float texturePixels = 256.f;
|
||||
if (config.clutFormat != GE_CMODE_32BIT_ABGR8888) {
|
||||
texturePixels = 512.f;
|
||||
}
|
||||
|
||||
float texturePixels = 512.f;
|
||||
writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);
|
||||
writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");
|
||||
}
|
||||
|
@ -640,7 +640,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
||||
p.C(" if (depalShift == 5u) { index0 = t.g; }\n");
|
||||
p.C(" else if (depalShift == 10u) { index0 = t.b; }\n");
|
||||
p.C(" }\n");
|
||||
p.F(" t = ").SampleTexture2D("pal", "vec2(index0 * factor, 0.0)").C(";\n");
|
||||
p.F(" t = ").SampleTexture2D("pal", "vec2(index0 * factor * 0.5, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT.
|
||||
break;
|
||||
case ShaderDepalMode::NORMAL:
|
||||
if (doTextureProjection) {
|
||||
|
@ -666,6 +666,10 @@ static const char *reinterpretStrings[4][4] = {
|
||||
|
||||
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
|
||||
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
|
||||
if (!useBufferedRendering_) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<CopySource> sources;
|
||||
for (auto src : vfbs_) {
|
||||
// Discard old and equal potential inputs.
|
||||
@ -773,30 +777,17 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
|
||||
WARN_LOG_ONCE(bta, G3D, "WARNING: Reinterpret encountered with BlueToAlpha on");
|
||||
}
|
||||
|
||||
if (IsBufferFormat16Bit(src->fb_format) && !IsBufferFormat16Bit(dst->fb_format)) {
|
||||
// We halve the X coordinates in the destination framebuffer.
|
||||
// The shader will collect two pixels worth of input data and merge into one.
|
||||
dstX1 *= 0.5f;
|
||||
dstX2 *= 0.5f;
|
||||
} else if (!IsBufferFormat16Bit(src->fb_format) && IsBufferFormat16Bit(dst->fb_format)) {
|
||||
// We double the X coordinates in the destination framebuffer.
|
||||
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
|
||||
dstX1 *= 2.0f;
|
||||
dstX2 *= 2.0f;
|
||||
}
|
||||
|
||||
// Reinterpret!
|
||||
WARN_LOG_N_TIMES(reint, 5, G3D, "Reinterpret detected from %08x_%s to %08x_%s",
|
||||
src->fb_address, GeBufferFormatToString(src->fb_format),
|
||||
dst->fb_address, GeBufferFormatToString(dst->fb_format));
|
||||
pipeline = reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format];
|
||||
|
||||
float scaleFactorX = 1.0f;
|
||||
pipeline = GetReinterpretPipeline(src->fb_format, dst->fb_format, &scaleFactorX);
|
||||
dstX1 *= scaleFactorX;
|
||||
dstX2 *= scaleFactorX;
|
||||
|
||||
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
|
||||
if (!pipeline) {
|
||||
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
|
||||
return GenerateReinterpretFragmentShader(shaderWriter, src->fb_format, dst->fb_format);
|
||||
});
|
||||
reinterpretFromTo_[(int)src->fb_format][(int)dst->fb_format] = pipeline;
|
||||
}
|
||||
|
||||
gpuStats.numReinterpretCopies++;
|
||||
}
|
||||
@ -819,6 +810,27 @@ void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFra
|
||||
textureCache_->ForgetLastTexture();
|
||||
}
|
||||
|
||||
Draw2DPipeline *FramebufferManagerCommon::GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX) {
|
||||
if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
|
||||
// We halve the X coordinates in the destination framebuffer.
|
||||
// The shader will collect two pixels worth of input data and merge into one.
|
||||
*scaleFactorX = 0.5f;
|
||||
} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
|
||||
// We double the X coordinates in the destination framebuffer.
|
||||
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
|
||||
*scaleFactorX = 2.0f;
|
||||
}
|
||||
|
||||
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)from][(int)to];
|
||||
if (!pipeline) {
|
||||
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
|
||||
return GenerateReinterpretFragmentShader(shaderWriter, from, to);
|
||||
});
|
||||
reinterpretFromTo_[(int)from][(int)to] = pipeline;
|
||||
}
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
|
||||
// Notify the texture cache of both the color and depth buffers.
|
||||
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
|
||||
@ -1069,7 +1081,7 @@ bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualF
|
||||
|
||||
// currentRenderVfb_ will always be set when this is called, except from the GE debugger.
|
||||
// Let's just not bother with the copy in that case.
|
||||
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY) || GPUStepping::IsStepping();
|
||||
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY);
|
||||
|
||||
// Currently rendering to this framebuffer. Need to make a copy.
|
||||
if (!skipCopy && framebuffer == currentRenderVfb_) {
|
||||
|
@ -421,14 +421,15 @@ public:
|
||||
// Returns the resolved framebuffer.
|
||||
VirtualFramebuffer *ResolveFramebufferColorToFormat(VirtualFramebuffer *vfb, GEBufferFormat newFormat);
|
||||
|
||||
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
|
||||
Draw2DPipeline *GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX);
|
||||
|
||||
protected:
|
||||
virtual void PackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
|
||||
void SetViewport2D(int x, int y, int w, int h);
|
||||
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
|
||||
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
|
||||
|
||||
Draw2DPipeline *Get2DPipeline(Draw2DShader shader);
|
||||
|
||||
void CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dest);
|
||||
void CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest);
|
||||
|
||||
|
@ -856,7 +856,7 @@ static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) {
|
||||
// Try to simulate some common logic ops by using blend, if needed.
|
||||
// The shader might also need modification, the below function SimulateLogicOpShaderTypeIfNeeded
|
||||
// takes care of that.
|
||||
static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
|
||||
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
|
||||
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
|
||||
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_LOGIC_OP)) {
|
||||
@ -866,7 +866,7 @@ static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend
|
||||
srcBlend = BlendFactor::ZERO;
|
||||
dstBlend = BlendFactor::ZERO;
|
||||
blendEq = BlendEq::ADD;
|
||||
break;
|
||||
return true;
|
||||
case GE_LOGIC_AND:
|
||||
case GE_LOGIC_AND_REVERSE:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
|
||||
@ -889,21 +889,23 @@ static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::SUBTRACT;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
return true;
|
||||
case GE_LOGIC_NOOP:
|
||||
srcBlend = BlendFactor::ZERO;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
break;
|
||||
return true;
|
||||
case GE_LOGIC_XOR:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
case GE_LOGIC_OR:
|
||||
case GE_LOGIC_OR_INVERTED:
|
||||
// Inverted in shader.
|
||||
srcBlend = BlendFactor::ONE;
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
return true;
|
||||
case GE_LOGIC_OR_REVERSE:
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
@ -912,10 +914,12 @@ static void SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend
|
||||
dstBlend = BlendFactor::ONE;
|
||||
blendEq = BlendEq::ADD;
|
||||
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
|
||||
break;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Choose the shader part of the above logic op fallback simulation.
|
||||
@ -950,7 +954,6 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
|
||||
BlendFactor srcBlend = BlendFactor::ONE;
|
||||
BlendFactor dstBlend = BlendFactor::ZERO;
|
||||
BlendEq blendEq = BlendEq::ADD;
|
||||
SimulateLogicOpIfNeeded(srcBlend, dstBlend, blendEq);
|
||||
|
||||
// We're not blending, but we may still want to "blend" for stencil.
|
||||
// This is only useful for INCR/DECR/INVERT. Others can write directly.
|
||||
@ -1058,8 +1061,10 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
|
||||
|
||||
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.framebufFormat);
|
||||
if (forceReplaceBlend) {
|
||||
replaceBlend = REPLACE_BLEND_READ_FRAMEBUFFER;
|
||||
// Enforce blend replacement if enabled. If not, shouldn't do anything of course.
|
||||
replaceBlend = gstate.isAlphaBlendEnabled() ? REPLACE_BLEND_READ_FRAMEBUFFER : REPLACE_BLEND_NO;
|
||||
}
|
||||
|
||||
blendState.replaceBlend = replaceBlend;
|
||||
|
||||
blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();
|
||||
@ -1250,11 +1255,6 @@ static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBl
|
||||
colorEq = eqLookupNoMinMax[blendFuncEq];
|
||||
}
|
||||
|
||||
// Attempt to apply simulated logic ops, if any and if needed.
|
||||
if (!forceReplaceBlend) {
|
||||
SimulateLogicOpIfNeeded(glBlendFuncA, glBlendFuncB, colorEq);
|
||||
}
|
||||
|
||||
// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
|
||||
// do any blending in the alpha channel as that doesn't seem to happen on PSP. So, we attempt to
|
||||
// apply the stencil to the alpha, since that's what should be stored.
|
||||
@ -1579,5 +1579,20 @@ void ComputedPipelineState::Convert(bool shaderBitOpsSuppported) {
|
||||
if (blendState.applyFramebufferRead || logicState.applyFramebufferRead) {
|
||||
maskState.ConvertToShaderBlend();
|
||||
logicState.ConvertToShaderBlend();
|
||||
} else {
|
||||
// If it isn't a read, we may need to change blending to apply the logic op.
|
||||
logicState.ApplyToBlendState(blendState);
|
||||
}
|
||||
}
|
||||
|
||||
void GenericLogicState::ApplyToBlendState(GenericBlendState &blendState) {
|
||||
if (SimulateLogicOpIfNeeded(blendState.srcColor, blendState.dstColor, blendState.eqColor)) {
|
||||
if (!blendState.blendEnabled) {
|
||||
// If it wasn't turned on, make sure it is now.
|
||||
blendState.blendEnabled = true;
|
||||
blendState.srcAlpha = BlendFactor::ONE;
|
||||
blendState.dstAlpha = BlendFactor::ZERO;
|
||||
blendState.eqAlpha = BlendEq::ADD;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -226,6 +226,7 @@ struct GenericLogicState {
|
||||
// Hardware and shader generation
|
||||
GELogicOp logicOp;
|
||||
|
||||
void ApplyToBlendState(GenericBlendState &blendState);
|
||||
void ConvertToShaderBlend() {
|
||||
if (logicOp != GE_LOGIC_COPY) {
|
||||
logicOpEnabled = false;
|
||||
@ -245,7 +246,9 @@ struct ComputedPipelineState {
|
||||
void Convert(bool shaderBitOpsSupported);
|
||||
|
||||
bool FramebufferRead() const {
|
||||
return blendState.applyFramebufferRead;
|
||||
// If blending is off, its applyFramebufferRead can be false even after state propagation.
|
||||
// So it's not enough to check just that one.
|
||||
return blendState.applyFramebufferRead || maskState.applyFramebufferRead || logicState.applyFramebufferRead;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1471,10 +1471,13 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
|
||||
}
|
||||
}
|
||||
|
||||
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool expandTo32bit) {
|
||||
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags) {
|
||||
u32 alphaSum = 0xFFFFFFFF;
|
||||
u32 fullAlphaMask = 0x0;
|
||||
|
||||
bool expandTo32bit = (flags & TexDecodeFlags::EXPAND32) != 0;
|
||||
bool reverseColors = (flags & TexDecodeFlags::REVERSE_COLORS) != 0;
|
||||
|
||||
bool swizzled = gstate.isTextureSwizzled();
|
||||
if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) {
|
||||
// This means it's in a mirror, possibly a swizzled mirror. Let's report.
|
||||
@ -2459,7 +2462,7 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, bool reverseColors) {
|
||||
void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, int stride, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags) {
|
||||
int w = gstate.getTextureWidth(srcLevel);
|
||||
int h = gstate.getTextureHeight(srcLevel);
|
||||
|
||||
@ -2486,9 +2489,11 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
|
||||
decPitch = stride;
|
||||
}
|
||||
|
||||
bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM;
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM) {
|
||||
texDecFlags |= TexDecodeFlags::EXPAND32;
|
||||
}
|
||||
|
||||
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, reverseColors, expand32);
|
||||
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, texDecFlags);
|
||||
entry.SetAlphaStatus(alphaResult, srcLevel);
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
|
@ -50,6 +50,12 @@ struct VirtualFramebuffer;
|
||||
class TextureReplacer;
|
||||
class ShaderManagerCommon;
|
||||
|
||||
enum class TexDecodeFlags {
|
||||
EXPAND32 = 1,
|
||||
REVERSE_COLORS = 2,
|
||||
};
|
||||
ENUM_CLASS_BITOPS(TexDecodeFlags);
|
||||
|
||||
namespace Draw {
|
||||
class DrawContext;
|
||||
class Texture;
|
||||
@ -354,13 +360,13 @@ protected:
|
||||
|
||||
virtual void BindAsClutTexture(Draw::Texture *tex, bool smooth) {}
|
||||
|
||||
CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, bool reverseColors, bool expandTo32Bit);
|
||||
CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags);
|
||||
void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
|
||||
CheckAlphaResult ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool reverseColors, bool expandTo32Bit);
|
||||
ReplacedTexture &FindReplacement(TexCacheEntry *entry, int &w, int &h, int &d);
|
||||
|
||||
// Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory.
|
||||
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, bool reverseColors);
|
||||
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, int mapRowPitch, ReplacedTexture &replaced, int srcLevel, int scaleFactor, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);
|
||||
|
||||
template <typename T>
|
||||
inline const T *GetCurrentClut() {
|
||||
|
@ -68,7 +68,7 @@ ClutTexture TextureShaderCache::GetClutTexture(GEPaletteFormat clutFormat, const
|
||||
ClutTexture *tex = new ClutTexture();
|
||||
|
||||
Draw::TextureDesc desc{};
|
||||
desc.width = maxClutEntries;
|
||||
desc.width = 512; // We always use 512-sized textures here for simplicity, though the most common is that only up to 256 entries are used.
|
||||
desc.height = 1;
|
||||
desc.depth = 1;
|
||||
desc.mipLevels = 1;
|
||||
|
@ -378,7 +378,7 @@ void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {
|
||||
return;
|
||||
}
|
||||
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, texFmt, false);
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, texFmt, TexDecodeFlags{});
|
||||
if (plan.depth == 1) {
|
||||
context_->UpdateSubresource(texture, i, nullptr, data, stride, 0);
|
||||
} else {
|
||||
|
@ -290,8 +290,6 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
|
||||
return;
|
||||
}
|
||||
|
||||
Draw::DataFormat texFmt = FromD3D9Format(dstFmt);
|
||||
|
||||
if (plan.depth == 1) {
|
||||
// Regular loop.
|
||||
for (int i = 0; i < levels; i++) {
|
||||
@ -307,7 +305,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
|
||||
}
|
||||
uint8_t *data = (uint8_t *)rect.pBits;
|
||||
int stride = rect.Pitch;
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false);
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, FromD3D9Format(dstFmt), TexDecodeFlags{});
|
||||
((LPDIRECT3DTEXTURE9)texture)->UnlockRect(dstLevel);
|
||||
}
|
||||
} else {
|
||||
@ -322,7 +320,7 @@ void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {
|
||||
uint8_t *data = (uint8_t *)box.pBits;
|
||||
int stride = box.RowPitch;
|
||||
for (int i = 0; i < plan.depth; i++) {
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, texFmt, false);
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, (i == 0) ? plan.baseLevelSrc : i, plan.scaleFactor, FromD3D9Format(dstFmt), TexDecodeFlags{});
|
||||
data += box.SlicePitch;
|
||||
}
|
||||
((LPDIRECT3DVOLUMETEXTURE9)texture)->UnlockBox(0);
|
||||
|
@ -325,7 +325,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
|
||||
return;
|
||||
}
|
||||
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, dstFmt, true);
|
||||
LoadTextureLevel(*entry, data, stride, *plan.replaced, srcLevel, plan.scaleFactor, dstFmt, TexDecodeFlags::REVERSE_COLORS);
|
||||
|
||||
// NOTE: TextureImage takes ownership of data, so we don't free it afterwards.
|
||||
render_->TextureImage(entry->textureName, i, mipWidth, mipHeight, 1, dstFmt, data, GLRAllocType::ALIGNED);
|
||||
@ -344,7 +344,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
|
||||
u8 *p = data;
|
||||
|
||||
for (int i = 0; i < plan.depth; i++) {
|
||||
LoadTextureLevel(*entry, p, stride, *plan.replaced, i, plan.scaleFactor, dstFmt, true);
|
||||
LoadTextureLevel(*entry, p, stride, *plan.replaced, i, plan.scaleFactor, dstFmt, TexDecodeFlags::REVERSE_COLORS);
|
||||
p += levelStride;
|
||||
}
|
||||
|
||||
|
27
GPU/Math3D.h
27
GPU/Math3D.h
@ -57,22 +57,10 @@ inline static T VecClamp(const T &v, const T &low, const T &high)
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
class Vec2
|
||||
{
|
||||
class Vec2 {
|
||||
public:
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
T x,y;
|
||||
};
|
||||
#if defined(_M_SSE)
|
||||
__m128i ivec;
|
||||
__m128 vec;
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
int32x4_t ivec;
|
||||
float32x4_t vec;
|
||||
#endif
|
||||
struct {
|
||||
T x,y;
|
||||
};
|
||||
|
||||
T* AsArray() { return &x; }
|
||||
@ -81,15 +69,6 @@ public:
|
||||
Vec2() {}
|
||||
Vec2(const T a[2]) : x(a[0]), y(a[1]) {}
|
||||
Vec2(const T& _x, const T& _y) : x(_x), y(_y) {}
|
||||
#if defined(_M_SSE)
|
||||
Vec2(const __m128 &_vec) : vec(_vec) {}
|
||||
Vec2(const __m128i &_ivec) : ivec(_ivec) {}
|
||||
#elif PPSSPP_ARCH(ARM64_NEON)
|
||||
Vec2(const float32x4_t &_vec) : vec(_vec) {}
|
||||
#if !defined(_MSC_VER)
|
||||
Vec2(const int32x4_t &_ivec) : ivec(_ivec) {}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template<typename T2>
|
||||
Vec2<T2> Cast() const
|
||||
|
@ -242,7 +242,7 @@ bool BinManager::HasTextureWrite(const RasterizerState &state) {
|
||||
if (!state.enableTextures)
|
||||
return false;
|
||||
|
||||
const int textureBits = textureBitsPerPixel[state.samplerID.texfmt];
|
||||
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
|
||||
for (int i = 0; i <= state.maxTexLevel; ++i) {
|
||||
int byteStride = (state.texbufw[i] * textureBits) / 8;
|
||||
int byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
|
||||
|
@ -78,14 +78,6 @@ static inline Vec3<int> Interpolate(const Vec3<int> &c0, const Vec3<int> &c1, co
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline Vec2<float> Interpolate(const Vec2<float> &c0, const Vec2<float> &c1, const Vec2<float> &c2, int w0, int w1, int w2, float wsum) {
|
||||
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
|
||||
return Vec2<float>(Interpolate(c0.vec, c1.vec, c2.vec, w0, w1, w2, wsum));
|
||||
#else
|
||||
return (c0 * w0 + c1 * w1 + c2 * w2) * wsum;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline Vec4<float> Interpolate(const float &c0, const float &c1, const float &c2, const Vec4<float> &w0, const Vec4<float> &w1, const Vec4<float> &w2, const Vec4<float> &wsum_recip) {
|
||||
#if defined(_M_SSE) && !PPSSPP_ARCH(X86)
|
||||
__m128 v = _mm_mul_ps(w0.vec, _mm_set1_ps(c0));
|
||||
@ -124,7 +116,7 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
|
||||
for (uint8_t i = 0; i <= state->maxTexLevel; i++) {
|
||||
u32 texaddr = gstate.getTextureAddress(i);
|
||||
state->texaddr[i] = texaddr;
|
||||
state->texbufw[i] = GetTextureBufw(i, texaddr, texfmt);
|
||||
state->texbufw[i] = (uint16_t)GetTextureBufw(i, texaddr, texfmt);
|
||||
if (Memory::IsValidAddress(texaddr))
|
||||
state->texptr[i] = Memory::GetPointerUnchecked(texaddr);
|
||||
else
|
||||
@ -143,9 +135,6 @@ void ComputeRasterizerState(RasterizerState *state, bool throughMode) {
|
||||
state->throughMode = throughMode;
|
||||
state->antialiasLines = gstate.isAntiAliasEnabled();
|
||||
|
||||
state->screenOffsetX = gstate.getOffsetX16();
|
||||
state->screenOffsetY = gstate.getOffsetY16();
|
||||
|
||||
#if defined(SOFTGPU_MEMORY_TAGGING_DETAILED) || defined(SOFTGPU_MEMORY_TAGGING_BASIC)
|
||||
DisplayList currentList{};
|
||||
if (gpuDebug)
|
||||
@ -421,7 +410,7 @@ Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &sourc
|
||||
|
||||
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) {
|
||||
const u8 **tptr0 = const_cast<const u8 **>(&state.texptr[texlevel]);
|
||||
const int *bufw0 = &state.texbufw[texlevel];
|
||||
const uint16_t *bufw0 = &state.texbufw[texlevel];
|
||||
|
||||
if (!bilinear) {
|
||||
return state.nearest(s, t, x, y, prim_color, tptr0, bufw0, texlevel, frac_texlevel, state.samplerID);
|
||||
@ -1476,7 +1465,7 @@ bool GetCurrentTexture(GPUDebugBuffer &buffer, int level)
|
||||
|
||||
GETextureFormat texfmt = gstate.getTextureFormat();
|
||||
u32 texaddr = gstate.getTextureAddress(level);
|
||||
int texbufw = GetTextureBufw(level, texaddr, texfmt);
|
||||
u32 texbufw = GetTextureBufw(level, texaddr, texfmt);
|
||||
int w = gstate.getTextureWidth(level);
|
||||
int h = gstate.getTextureHeight(level);
|
||||
|
||||
|
@ -39,11 +39,9 @@ struct RasterizerState {
|
||||
Sampler::LinearFunc linear;
|
||||
Sampler::NearestFunc nearest;
|
||||
uint32_t texaddr[8]{};
|
||||
int texbufw[8]{};
|
||||
uint16_t texbufw[8]{};
|
||||
const u8 *texptr[8]{};
|
||||
float textureLodSlope;
|
||||
int screenOffsetX;
|
||||
int screenOffsetY;
|
||||
|
||||
struct {
|
||||
uint8_t maxTexLevel : 3;
|
||||
|
@ -103,7 +103,7 @@ void DrawSprite(const VertexData &v0, const VertexData &v1, const BinCoords &ran
|
||||
const u8 *texptr = state.texptr[0];
|
||||
|
||||
GETextureFormat texfmt = state.samplerID.TexFmt();
|
||||
int texbufw = state.texbufw[0];
|
||||
uint16_t texbufw = state.texbufw[0];
|
||||
|
||||
Sampler::FetchFunc fetchFunc = Sampler::GetFetchFunc(state.samplerID);
|
||||
auto &pixelID = state.pixelID;
|
||||
@ -300,9 +300,9 @@ bool RectangleFastPath(const VertexData &v0, const VertexData &v1, BinManager &b
|
||||
if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && v0.texturecoords.x == 64.0f && v0.texturecoords.y == 16.0f && v1.texturecoords.x == 448.0f && v1.texturecoords.y == 240.0f) {
|
||||
// check for save/load dialog.
|
||||
if (!currentDialogActive) {
|
||||
if (v0.screenpos.x + state.screenOffsetX == 0x7100 && v0.screenpos.y + state.screenOffsetY == 0x7780 && v1.screenpos.x + state.screenOffsetX == 0x8f00 && v1.screenpos.y + state.screenOffsetY == 0x8880) {
|
||||
if (v0.screenpos.x + gstate.getOffsetX16() == 0x7100 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8f00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {
|
||||
g_DarkStalkerStretch = DSStretch::Wide;
|
||||
} else if (v0.screenpos.x + state.screenOffsetX == 0x7400 && v0.screenpos.y + state.screenOffsetY == 0x7780 && v1.screenpos.x + state.screenOffsetX == 0x8C00 && v1.screenpos.y + state.screenOffsetY == 0x8880) {
|
||||
} else if (v0.screenpos.x + gstate.getOffsetX16() == 0x7400 && v0.screenpos.y + gstate.getOffsetY16() == 0x7780 && v1.screenpos.x + gstate.getOffsetX16() == 0x8C00 && v1.screenpos.y + gstate.getOffsetY16() == 0x8880) {
|
||||
g_DarkStalkerStretch = DSStretch::Normal;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -38,8 +38,8 @@ using namespace Rasterizer;
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
|
||||
|
||||
std::mutex jitCacheLock;
|
||||
@ -281,7 +281,7 @@ struct Nearest4 {
|
||||
};
|
||||
|
||||
template <int N>
|
||||
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, int texbufw, int level, const SamplerID &samplerID) {
|
||||
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, uint16_t texbufw, int level, const SamplerID &samplerID) {
|
||||
Nearest4 res;
|
||||
if (!srcptr) {
|
||||
memset(res.v, 0, sizeof(res.v));
|
||||
@ -535,7 +535,7 @@ Vec4IntResult SOFTRAST_CALL GetTextureFunctionOutput(Vec4IntArg prim_color_in, V
|
||||
return ToVec4IntResult(Vec4<int>(out_rgb, out_a));
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID) {
|
||||
int u, v;
|
||||
|
||||
// Nearest filtering only. Round texcoords.
|
||||
@ -631,7 +631,7 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, fl
|
||||
return ApplyTexelClampQuadT(samplerID.clampT, base_v, height);
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const int *bufw, int texlevel, const SamplerID &samplerID) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const uint16_t *bufw, int texlevel, const SamplerID &samplerID) {
|
||||
int frac_u, frac_v;
|
||||
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x, samplerID);
|
||||
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y, samplerID);
|
||||
@ -646,7 +646,7 @@ static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, in
|
||||
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
|
||||
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel, samplerID);
|
||||
if (levelFrac) {
|
||||
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1, samplerID);
|
||||
|
@ -36,10 +36,10 @@ namespace Sampler {
|
||||
typedef Rasterizer::Vec4IntResult(SOFTRAST_CALL *FetchFunc)(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
|
||||
FetchFunc GetFetchFunc(SamplerID id);
|
||||
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *NearestFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *NearestFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
NearestFunc GetNearestFunc(SamplerID id);
|
||||
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *LinearFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
typedef Rasterizer::Vec4IntResult (SOFTRAST_CALL *LinearFunc)(float s, float t, int x, int y, Rasterizer::Vec4IntArg prim_color, const u8 *const *tptr, const uint16_t *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
LinearFunc GetLinearFunc(SamplerID id);
|
||||
|
||||
void Init();
|
||||
|
@ -246,7 +246,7 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
||||
auto loadPtrs = [&](bool level1) {
|
||||
X64Reg bufwReg = regCache_.Alloc(RegCache::GEN_ARG_BUFW);
|
||||
X64Reg bufwPtrReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
MOV(32, R(bufwReg), MDisp(bufwPtrReg, level1 ? 4 : 0));
|
||||
MOVZX(32, 16, bufwReg, MDisp(bufwPtrReg, level1 ? 2 : 0));
|
||||
regCache_.Unlock(bufwPtrReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW);
|
||||
regCache_.ForceRetain(RegCache::GEN_ARG_BUFW);
|
||||
@ -713,7 +713,7 @@ LinearFunc SamplerJitCache::CompileLinear(const SamplerID &id) {
|
||||
X64Reg srcReg = regCache_.Find(RegCache::GEN_ARG_TEXPTR_PTR);
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
ADD(64, R(srcArgReg), MDisp(srcReg, level1 ? 8 : 0));
|
||||
MOV(32, R(bufwArgReg), MDisp(bufwReg, level1 ? 4 : 0));
|
||||
MOVZX(32, 16, bufwArgReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
// Leave level/levelFrac, we just always load from RAM on Windows and lock on POSIX.
|
||||
regCache_.Unlock(srcReg, RegCache::GEN_ARG_TEXPTR_PTR);
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
@ -2995,12 +2995,13 @@ bool SamplerJitCache::Jit_PrepareDataDirectOffsets(const SamplerID &id, RegCache
|
||||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
if (cpu_info.bAVX2) {
|
||||
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
} else {
|
||||
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
PXOR(bufwVecReg, R(bufwVecReg));
|
||||
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
|
||||
}
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
|
||||
if (bitsPerTexel == 4)
|
||||
@ -3070,12 +3071,13 @@ bool SamplerJitCache::Jit_PrepareDataSwizzledOffsets(const SamplerID &id, RegCac
|
||||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
if (cpu_info.bAVX2) {
|
||||
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
} else {
|
||||
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
PXOR(bufwVecReg, R(bufwVecReg));
|
||||
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
|
||||
}
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
}
|
||||
|
||||
@ -3162,12 +3164,13 @@ bool SamplerJitCache::Jit_PrepareDataDXTOffsets(const SamplerID &id, Rasterizer:
|
||||
if (!id.useStandardBufw || id.hasAnyMips) {
|
||||
// Spread bufw into each lane.
|
||||
X64Reg bufwReg = regCache_.Find(RegCache::GEN_ARG_BUFW_PTR);
|
||||
if (cpu_info.bAVX2) {
|
||||
VPBROADCASTD(128, bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
if (cpu_info.bSSE4_1) {
|
||||
PMOVZXWD(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0));
|
||||
} else {
|
||||
MOVD_xmm(bufwVecReg, MDisp(bufwReg, level1 ? 4 : 0));
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
PXOR(bufwVecReg, R(bufwVecReg));
|
||||
PINSRW(bufwVecReg, MDisp(bufwReg, level1 ? 2 : 0), 0);
|
||||
}
|
||||
PSHUFD(bufwVecReg, R(bufwVecReg), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
regCache_.Unlock(bufwReg, RegCache::GEN_ARG_BUFW_PTR);
|
||||
|
||||
// Divide by 4 before the multiply.
|
||||
|
@ -164,6 +164,15 @@ public:
|
||||
|
||||
TransformUnit transformUnit;
|
||||
|
||||
#if PPSSPP_ARCH(32BIT)
|
||||
void *operator new(size_t s) {
|
||||
return AllocateAlignedMemory(s, 16);
|
||||
}
|
||||
void operator delete(void *p) {
|
||||
FreeAlignedMemory(p);
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
bool UpdateUseHWTessellation(bool enable) override { return false; }
|
||||
};
|
||||
|
@ -44,7 +44,8 @@
|
||||
|
||||
// Most drivers treat vkCreateShaderModule as pretty much a memcpy. What actually
|
||||
// takes time here, and makes this worthy of parallelization, is GLSLtoSPV.
|
||||
Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code) {
|
||||
// Takes ownership over tag.
|
||||
static Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code, std::string *tag) {
|
||||
auto compile = [=] {
|
||||
PROFILE_THIS_SCOPE("shadercomp");
|
||||
|
||||
@ -59,12 +60,13 @@ Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShade
|
||||
} else {
|
||||
ERROR_LOG(G3D, "Error in shader compilation!");
|
||||
}
|
||||
std::string numberedSource = LineNumberString(code);
|
||||
ERROR_LOG(G3D, "Messages: %s", errorMessage.c_str());
|
||||
ERROR_LOG(G3D, "Shader source:\n%s", code);
|
||||
#ifdef SHADERLOG
|
||||
OutputDebugStringA(LineNumberString(code).c_str());
|
||||
ERROR_LOG(G3D, "Shader source:\n%s", numberedSource.c_str());
|
||||
#if PPSSPP_PLATFORM(WINDOWS)
|
||||
OutputDebugStringA("Error messages:\n");
|
||||
OutputDebugStringA(errorMessage.c_str());
|
||||
OutputDebugStringA(numberedSource.c_str());
|
||||
#endif
|
||||
Reporting::ReportMessage("Vulkan error in shader compilation: info: %s / code: %s", errorMessage.c_str(), code);
|
||||
}
|
||||
@ -75,6 +77,10 @@ Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShade
|
||||
#ifdef SHADERLOG
|
||||
OutputDebugStringA("OK");
|
||||
#endif
|
||||
if (tag) {
|
||||
vulkan->SetDebugName(shaderModule, VK_OBJECT_TYPE_SHADER_MODULE, tag->c_str());
|
||||
delete tag;
|
||||
}
|
||||
}
|
||||
|
||||
return shaderModule;
|
||||
@ -92,7 +98,7 @@ Promise<VkShaderModule> *CompileShaderModuleAsync(VulkanContext *vulkan, VkShade
|
||||
VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, FragmentShaderFlags flags, const char *code)
|
||||
: vulkan_(vulkan), id_(id), flags_(flags) {
|
||||
source_ = code;
|
||||
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_FRAGMENT_BIT, source_.c_str());
|
||||
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_FRAGMENT_BIT, source_.c_str(), new std::string(FragmentShaderDesc(id)));
|
||||
if (!module_) {
|
||||
failed_ = true;
|
||||
} else {
|
||||
@ -122,7 +128,7 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co
|
||||
VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, const char *code, bool useHWTransform)
|
||||
: vulkan_(vulkan), useHWTransform_(useHWTransform), id_(id) {
|
||||
source_ = code;
|
||||
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str());
|
||||
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id).c_str()));
|
||||
if (!module_) {
|
||||
failed_ = true;
|
||||
} else {
|
||||
|
@ -728,7 +728,10 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
|
||||
u32 *pixelData;
|
||||
int decPitch;
|
||||
|
||||
bool expand32 = !gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT;
|
||||
TexDecodeFlags texDecFlags{};
|
||||
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT) {
|
||||
texDecFlags |= TexDecodeFlags::EXPAND32;
|
||||
}
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
tmpTexBufRearrange_.resize(std::max(bufw, w) * h);
|
||||
@ -740,7 +743,7 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
|
||||
decPitch = rowPitch;
|
||||
}
|
||||
|
||||
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, false, expand32);
|
||||
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, level, bufw, texDecFlags);
|
||||
entry.SetAlphaStatus(alphaResult, level);
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
|
@ -374,8 +374,7 @@ enum GEMatrixType {
|
||||
GE_MTX_TEXGEN,
|
||||
};
|
||||
|
||||
enum GEComparison
|
||||
{
|
||||
enum GEComparison : uint8_t {
|
||||
GE_COMP_NEVER = 0,
|
||||
GE_COMP_ALWAYS = 1,
|
||||
GE_COMP_EQUAL = 2,
|
||||
@ -578,8 +577,7 @@ enum GEPrimitiveType
|
||||
GE_PRIM_INVALID = -1,
|
||||
};
|
||||
|
||||
enum GELogicOp
|
||||
{
|
||||
enum GELogicOp : uint8_t {
|
||||
GE_LOGIC_CLEAR = 0,
|
||||
GE_LOGIC_AND = 1,
|
||||
GE_LOGIC_AND_REVERSE = 2,
|
||||
|
@ -251,7 +251,7 @@ private:
|
||||
return nullptr;
|
||||
}
|
||||
const auto recentIsos = g_Config.RecentIsos();
|
||||
if (index >= recentIsos.size())
|
||||
if (index >= (int)recentIsos.size())
|
||||
return nullptr;
|
||||
return g_gameInfoCache->GetInfo(dc.GetDrawContext(), Path(recentIsos[index]), GAMEINFO_WANTBG);
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ static bool TestSamplerJit() {
|
||||
bool header = false;
|
||||
|
||||
u8 **tptr = new u8 *[8];
|
||||
int *bufw = new int[8];
|
||||
uint16_t *bufw = new uint16_t[8];
|
||||
u8 *clut = new u8[1024];
|
||||
memset(clut, 0, 1024);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user