Merge pull request #8731 from Pokechu22/dsp-lle-cpu-int-crash

Fix crash when using DSP LLE with CPU interpreter (or fastmem off)
This commit is contained in:
Tilka 2020-08-01 15:26:27 +01:00 committed by GitHub
commit dbacffd75d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 49 additions and 90 deletions

View File

@ -95,7 +95,7 @@ u32 HashAdler32(const u8* data, size_t len)
// Stupid hash - but can't go back now :)
// Don't use for new things. At least it's reasonably fast.
u32 HashEctor(const u8* ptr, int length)
u32 HashEctor(const u8* ptr, size_t length)
{
u32 crc = 0;

View File

@ -12,7 +12,7 @@ namespace Common
{
u32 HashFletcher(const u8* data_u8, size_t length); // FAST. Length & 1 == 0.
u32 HashAdler32(const u8* data, size_t len); // Fairly accurate, slightly slower
u32 HashEctor(const u8* ptr, int length); // JUNK. DO NOT USE FOR NEW THINGS
u32 HashEctor(const u8* ptr, size_t length); // JUNK. DO NOT USE FOR NEW THINGS
u64 GetHash64(const u8* src, u32 len, u32 samples);
void SetHash64Function();
} // namespace Common

View File

@ -68,11 +68,11 @@ bool Compare(const std::vector<u16>& code1, const std::vector<u16>& code2)
if (code1.size() != code2.size())
printf("Size difference! 1=%zu 2=%zu\n", code1.size(), code2.size());
u32 count_equal = 0;
const int min_size = std::min<int>((int)code1.size(), (int)code2.size());
const u16 min_size = static_cast<u16>(std::min(code1.size(), code2.size()));
AssemblerSettings settings;
DSPDisassembler disassembler(settings);
for (int i = 0; i < min_size; i++)
for (u16 i = 0; i < min_size; i++)
{
if (code1[i] == code2[i])
{
@ -93,7 +93,7 @@ bool Compare(const std::vector<u16>& code1, const std::vector<u16>& code2)
{
printf("Extra code words:\n");
const std::vector<u16>& longest = code1.size() > code2.size() ? code1 : code2;
for (int i = min_size; i < (int)longest.size(); i++)
for (u16 i = min_size; i < longest.size(); i++)
{
u16 pc = i;
std::string line;
@ -146,7 +146,7 @@ bool SaveBinary(const std::vector<u16>& code, const std::string& filename)
return File::WriteStringToFile(filename, buffer);
}
bool DumpDSPCode(const u8* code_be, int size_in_bytes, u32 crc)
bool DumpDSPCode(const u8* code_be, size_t size_in_bytes, u32 crc)
{
const std::string root_name =
File::GetUserPath(D_DUMPDSP_IDX) + fmt::format("DSP_UC_{:08X}", crc);

View File

@ -24,5 +24,5 @@ std::vector<u16> BinaryStringBEToCode(const std::string& str);
std::optional<std::vector<u16>> LoadBinary(const std::string& filename);
bool SaveBinary(const std::vector<u16>& code, const std::string& filename);
bool DumpDSPCode(const u8* code_be, int size_in_bytes, u32 crc);
bool DumpDSPCode(const u8* code_be, size_t size_in_bytes, u32 crc);
} // namespace DSP

View File

@ -306,9 +306,6 @@ struct SDSP
u16* dram;
u16* irom;
u16* coef;
// This one doesn't really belong here.
u8* cpu_ram;
};
extern SDSP g_dsp;

View File

@ -11,7 +11,6 @@
#include "Common/CPUDetect.h"
#include "Common/CommonTypes.h"
#include "Common/Hash.h"
#include "Common/Intrinsics.h"
#include "Common/Logging/Log.h"
#include "Common/MemoryUtil.h"
@ -283,22 +282,15 @@ u16 gdsp_ifx_read(u16 addr)
static const u8* gdsp_idma_in(u16 dsp_addr, u32 addr, u32 size)
{
u16* dst = g_dsp.iram + (dsp_addr / 2);
const u8* code = &g_dsp.cpu_ram[addr & 0x0fffffff];
g_dsp.iram_crc = Common::HashEctor(code, size);
Common::UnWriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false);
memcpy(dst, code, size);
for (size_t i = 0; i < size / 2; i++)
dst[i] = Common::swap16(dst[i]);
Host::DMAToDSP(g_dsp.iram + dsp_addr / 2, addr, size);
Common::WriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false);
Host::CodeLoaded(code, size);
Host::CodeLoaded(addr, size);
NOTICE_LOG(DSPLLE, "*** Copy new UCode from 0x%08x to 0x%04x (crc: %8x)", addr, dsp_addr,
g_dsp.iram_crc);
return reinterpret_cast<u8*>(dst);
return reinterpret_cast<u8*>(g_dsp.iram) + dsp_addr;
}
static const u8* gdsp_idma_out(u16 dsp_addr, u32 addr, u32 size)
@ -309,80 +301,23 @@ static const u8* gdsp_idma_out(u16 dsp_addr, u32 addr, u32 size)
return nullptr;
}
#if defined(_M_X86) || defined(_M_X86_64)
static const __m128i s_mask = _mm_set_epi32(0x0E0F0C0DL, 0x0A0B0809L, 0x06070405L, 0x02030001L);
FUNCTION_TARGET_SSSE3
static void gdsp_ddma_in_SSSE3(u16 dsp_addr, u32 addr, u32 size, u8* dst)
{
for (u32 i = 0; i < size; i += 16)
{
_mm_storeu_si128(
(__m128i*)&dst[dsp_addr + i],
_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]),
s_mask));
}
}
FUNCTION_TARGET_SSSE3
static void gdsp_ddma_out_SSSE3(u16 dsp_addr, u32 addr, u32 size, const u8* src)
{
for (u32 i = 0; i < size; i += 16)
{
_mm_storeu_si128((__m128i*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF],
_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)&src[dsp_addr + i]), s_mask));
}
}
#endif
// TODO: These should eat clock cycles.
static const u8* gdsp_ddma_in(u16 dsp_addr, u32 addr, u32 size)
{
u8* dst = reinterpret_cast<u8*>(g_dsp.dram);
#if defined(_M_X86) || defined(_M_X86_64)
if (cpu_info.bSSSE3 && !(size % 16))
{
gdsp_ddma_in_SSSE3(dsp_addr, addr, size, dst);
}
else
#endif
{
for (u32 i = 0; i < size; i += 2)
{
*(u16*)&dst[dsp_addr + i] =
Common::swap16(*(const u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF]);
}
}
Host::DMAToDSP(g_dsp.dram + dsp_addr / 2, addr, size);
DEBUG_LOG(DSPLLE, "*** ddma_in RAM (0x%08x) -> DRAM_DSP (0x%04x) : size (0x%08x)", addr,
dsp_addr / 2, size);
return dst + dsp_addr;
return reinterpret_cast<u8*>(g_dsp.dram) + dsp_addr;
}
static const u8* gdsp_ddma_out(u16 dsp_addr, u32 addr, u32 size)
{
const u8* src = reinterpret_cast<const u8*>(g_dsp.dram);
#ifdef _M_X86
if (cpu_info.bSSSE3 && !(size % 16))
{
gdsp_ddma_out_SSSE3(dsp_addr, addr, size, src);
}
else
#endif
{
for (u32 i = 0; i < size; i += 2)
{
*(u16*)&g_dsp.cpu_ram[(addr + i) & 0x7FFFFFFF] =
Common::swap16(*(const u16*)&src[dsp_addr + i]);
}
}
Host::DMAFromDSP(g_dsp.dram + dsp_addr / 2, addr, size);
DEBUG_LOG(DSPLLE, "*** ddma_out DRAM_DSP (0x%04x) -> RAM (0x%08x) : size (0x%08x)", dsp_addr / 2,
addr, size);
return src + dsp_addr;
return reinterpret_cast<const u8*>(g_dsp.dram) + dsp_addr;
}
static void gdsp_do_dma()

View File

@ -17,10 +17,13 @@ namespace DSP::Host
{
u8 ReadHostMemory(u32 addr);
void WriteHostMemory(u8 value, u32 addr);
void DMAToDSP(u16* dst, u32 addr, u32 size);
void DMAFromDSP(const u16* src, u32 addr, u32 size);
void OSD_AddMessage(std::string str, u32 ms);
bool OnThread();
bool IsWiiHost();
void InterruptRequest();
void CodeLoaded(const u8* ptr, int size);
void CodeLoaded(u32 addr, size_t size);
void CodeLoaded(const u8* ptr, size_t size);
void UpdateDebugger();
} // namespace DSP::Host

View File

@ -16,6 +16,7 @@
#include "Core/DSP/Jit/x64/DSPEmitter.h"
#include "Core/HW/DSP.h"
#include "Core/HW/DSPLLE/DSPSymbols.h"
#include "Core/HW/Memmap.h"
#include "Core/Host.h"
#include "VideoCommon/OnScreenDisplay.h"
@ -36,6 +37,16 @@ void WriteHostMemory(u8 value, u32 addr)
DSP::WriteARAM(value, addr);
}
void DMAToDSP(u16* dst, u32 addr, u32 size)
{
Memory::CopyFromEmuSwapped(dst, addr, size);
}
void DMAFromDSP(const u16* src, u32 addr, u32 size)
{
Memory::CopyToEmuSwapped(addr, src, size);
}
void OSD_AddMessage(std::string str, u32 ms)
{
OSD::AddMessage(std::move(str), ms);
@ -57,8 +68,14 @@ void InterruptRequest()
DSP::GenerateDSPInterruptFromDSPEmu(DSP::INT_DSP);
}
void CodeLoaded(const u8* ptr, int size)
void CodeLoaded(u32 addr, size_t size)
{
CodeLoaded(Memory::GetPointer(addr), size);
}
void CodeLoaded(const u8* ptr, size_t size)
{
g_dsp.iram_crc = Common::HashEctor(ptr, size);
if (SConfig::GetInstance().m_DumpUCode)
{
DSP::DumpDSPCode(ptr, size, g_dsp.iram_crc);

View File

@ -78,8 +78,10 @@ void DSPLLE::DoState(PointerWrap& p)
Common::UnWriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false);
p.DoArray(g_dsp.iram, DSP_IRAM_SIZE);
Common::WriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false);
// TODO: This uses the wrong endianness (producing bad disassembly)
// and a bogus byte count (producing bad hashes)
if (p.GetMode() == PointerWrap::MODE_READ)
Host::CodeLoaded((const u8*)g_dsp.iram, DSP_IRAM_BYTE_SIZE);
Host::CodeLoaded(reinterpret_cast<const u8*>(g_dsp.iram), DSP_IRAM_BYTE_SIZE);
p.DoArray(g_dsp.dram, DSP_DRAM_SIZE);
p.Do(g_init_hax);
p.Do(m_cycle_count);
@ -186,10 +188,6 @@ bool DSPLLE::Initialize(bool wii, bool dsp_thread)
m_wii = wii;
m_is_dsp_on_thread = dsp_thread;
// DSPLLE directly accesses the fastmem arena.
// TODO: The fastmem arena is only supposed to be used by the JIT:
// among other issues, its size is only 1GB on 32-bit targets.
g_dsp.cpu_ram = Memory::physical_base;
DSPCore_Reset();
InitInstructionTable();

View File

@ -22,6 +22,12 @@ u8 DSP::Host::ReadHostMemory(u32 addr)
void DSP::Host::WriteHostMemory(u8 value, u32 addr)
{
}
void DSP::Host::DMAToDSP(u16* dst, u32 addr, u32 size)
{
}
void DSP::Host::DMAFromDSP(const u16* src, u32 addr, u32 size)
{
}
void DSP::Host::OSD_AddMessage(std::string str, u32 ms)
{
}
@ -33,7 +39,10 @@ bool DSP::Host::IsWiiHost()
{
return false;
}
void DSP::Host::CodeLoaded(const u8* ptr, int size)
void DSP::Host::CodeLoaded(u32 addr, size_t size)
{
}
void DSP::Host::CodeLoaded(const u8* ptr, size_t size)
{
}
void DSP::Host::InterruptRequest()