Port over the Exynos cacheline size fix from Dolphin. Thanks to lewurm of the mono project for the discovery and original fix.

See https://github.com/dolphin-emu/dolphin/pull/4204 and https://github.com/mono/mono/pull/3549
This commit is contained in:
Henrik Rydgard 2016-09-10 09:25:06 +02:00
parent f39c603acb
commit 03279e1212
5 changed files with 40 additions and 39 deletions

View File

@ -313,24 +313,40 @@ void ARM64XEmitter::FlushIcache()
void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
{
if (cpu_info.sBugs.bExynos8890Invalidation)
{
// Over invalidate to force this CPU to listen.
start = m_startcode + 4096 < start ? start - 4096 : m_startcode;
end += 4096;
}
#if defined(IOS)
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
#else
#if (defined(__clang__) && !defined(_M_IX86) && !defined(_M_X64)) || defined(ANDROID)
__clear_cache(start, end);
#else
#if !defined(_M_IX86) && !defined(_M_X64)
__builtin___clear_cache(start, end);
#endif
#endif
#elif !defined(_M_IX86) && !defined(_M_X64)
// Code from Dolphin, contributed by the Mono project.
// Don't rely on GCC's __clear_cache implementation, as it caches
// icache/dcache cache line sizes, that can vary between cores on
// big.LITTLE architectures.
u64 addr, ctr_el0;
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
size_t isize, dsize;
__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
isize = 4 << ((ctr_el0 >> 0) & 0xf);
dsize = 4 << ((ctr_el0 >> 16) & 0xf);
// use the global minimum cache line size
icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
addr = (u64)start & ~(u64)(dsize - 1);
for (; addr < (u64)end; addr += dsize)
// use "civac" instead of "cvau", as this is the suggested workaround for
// Cortex-A53 errata 819472, 826319, 827319 and 824069.
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");
addr = (u64)start & ~(u64)(isize - 1);
for (; addr < (u64)end; addr += isize)
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
__asm__ volatile("dsb ish" : : : "memory");
__asm__ volatile("isb" : : : "memory");
#endif
}

View File

@ -89,14 +89,15 @@ struct CPUInfo {
bool bXBurst1;
bool bXBurst2;
// Bugs
// Quirks
struct {
// Samsung Galaxy S7 devices (Exynos 8890) have a bug that causes invalidation to work incorrectly.
// This may be caused by interaction between the separate CPU cores.
// Padding jit blocks and over-invalidating seems to "solve" it.
// Only affects ARM64.
bool bExynos8890Invalidation;
} sBugs;
// Samsung Galaxy S7 devices (Exynos 8890) have a big.LITTLE configuration where the cacheline size differs between big and LITTLE.
// GCC's cache clearing function would detect the cacheline size on one and keep it for later. When clearing
// with the wrong cacheline size on the other, that's an issue. In case we want to do something different in this
// situation in the future, let's keep this as a quirk, but our current code won't detect it reliably
// if it happens on new archs. We now use better clearing code on ARM64 that doesn't have this issue.
bool bExynos8890DifferingCachelineSizes;
} sQuirks;
// Call Detect()
explicit CPUInfo();

View File

@ -333,14 +333,6 @@ const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
if (dontLogBlocks > 0)
dontLogBlocks--;
if (cpu_info.sBugs.bExynos8890Invalidation) {
// What a waste. If we don't do both this and over-invalidate, the device crashes.
// This space won't ever get run, but it's wasted jit cache space.
for (int i = 0; i < 32; ++i) {
HINT(HINT_NOP);
}
}
// Don't forget to zap the newly written instructions in the instruction cache!
FlushIcache();

View File

@ -278,14 +278,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
RET();
if (cpu_info.sBugs.bExynos8890Invalidation) {
// Apparently the vertex cache hasn't been the problem, but adding this here for the same
// reasons as the standard jit.
for (int i = 0; i < 32; ++i) {
HINT(HINT_NOP);
}
}
FlushIcache();
if (log) {

View File

@ -526,7 +526,7 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_init
// Unfortunately, on the Samsung Galaxy S7, this isn't in /proc/cpuinfo.
// We also can't read it from __system_property_get.
if (buildBoard == "universal8890") {
cpu_info.sBugs.bExynos8890Invalidation = true;
cpu_info.sQuirks.bExynos8890DifferingCachelineSizes = true;
}
NativeGetAppInfo(&app_name, &app_nice_name, &landscape, &version);