From 7037d9c36373d8f06f46f589fc0173d7980a5408 Mon Sep 17 00:00:00 2001 From: Gabriel Correia Date: Sat, 22 Jun 2024 00:58:11 -0300 Subject: [PATCH] `EE`: Improve the nomenclature used in the EE cache --- app/src/main/cpp/cosmic/ee/cop0.h | 4 +- app/src/main/cpp/cosmic/ee/cop_cache.cpp | 130 +++++++++++----------- app/src/main/cpp/cosmic/ee/cop_misc.cpp | 17 +-- app/src/main/cpp/cosmic/ee/ee_core.cpp | 14 ++- app/src/main/cpp/cosmic/gs/gif_bridge.cpp | 2 +- app/src/main/cpp/cosmic/gs/gif_bridge.h | 2 +- app/src/main/cpp/cosmic/os/neon_simd.h | 7 +- 7 files changed, 94 insertions(+), 82 deletions(-) diff --git a/app/src/main/cpp/cosmic/ee/cop0.h b/app/src/main/cpp/cosmic/ee/cop0.h index a521292..5cca797 100644 --- a/app/src/main/cpp/cosmic/ee/cop0.h +++ b/app/src/main/cpp/cosmic/ee/cop0.h @@ -18,9 +18,9 @@ namespace cosmic::ee { union CacheWay { u32 u[16]; u64 large[8]; - os::vec vec[4]{}; + os::vec vec[4]; }; - std::array ec; + std::array ways; }; enum CacheMode { Instruction, diff --git a/app/src/main/cpp/cosmic/ee/cop_cache.cpp b/app/src/main/cpp/cosmic/ee/cop_cache.cpp index 2fadc21..75ca717 100644 --- a/app/src/main/cpp/cosmic/ee/cop_cache.cpp +++ b/app/src/main/cpp/cosmic/ee/cop_cache.cpp @@ -17,17 +17,17 @@ namespace cosmic::ee { if (!lineLayer) { throw Cop0Err("Address {:#x} isn't cached or doesn't have a valid tag referencing it", address); } - const auto& cont{cachedData.ec[lineLayer - 1]}; + const auto& cont{cachedData.ways[lineLayer - 1]}; return cont.vec[(address >> 4) & 3]; } void CtrlCop::invIndexed(u32 address) { - auto cc{getCache(address, true)}; - cc.tags[0] &= ~dirtyBit; - cc.tags[1] &= ~dirtyBit; - cc.lrf[0] = cc.lrf[1] = { + auto invWaysAt{getCache(address, true)}; + invWaysAt.tags[0] &= ~dirtyBit; + invWaysAt.tags[1] &= ~dirtyBit; + invWaysAt.lrf[0] = invWaysAt.lrf[1] = { }; - std::memset(cc.ec.data(), 0, sizeof(cc.ec)); + std::memset(invWaysAt.ways.data(), 0, sizeof(invWaysAt.ways)); } bool CtrlCop::isCacheHit(u32 address, u8 lane, CacheMode mode) { // Each cache line is indexed by virtual address @@ -48,51 +48,51 @@ namespace cosmic::ee { } return {}; } - void CtrlCop::loadCacheLine(u32 address, EeMipsCore& core, CacheMode mode) { + void CtrlCop::loadCacheLine(u32 address, EeMipsCore& mips, CacheMode mode) { auto masterIndex{ getCachePfn(address, mode)}; - auto pear{ + auto writableCache{ getCache(address, true, mode)}; - assignFlushedCache(pear, masterIndex); + assignFlushedCache(writableCache, masterIndex); masterIndex |= dirtyBit; - if ((pear.tags[0] != masterIndex && - pear.tags[1] != masterIndex)) { + if ((writableCache.tags[0] != masterIndex && + writableCache.tags[1] != masterIndex)) { throw Cop0Err("No portion of the cache line {:#x} was properly selected! Tags: {:#x}", - masterIndex, fmt::join(pear.tags, ", ")); + masterIndex, fmt::join(writableCache.tags, ", ")); } // Due to the LRF algorithm, we will write to the way that was written last (thus keeping // the last data among the ways in the cache, waiting for one more miss) - u8 way{pear.lrf[0] && !pear.lrf[1]}; - if (!way) { - if (!pear.lrf[0] && pear.lrf[1]) - way = 2; + u8 primaryWay{writableCache.lrf[0] && !writableCache.lrf[1]}; + if (!primaryWay) { + if (!writableCache.lrf[0] && writableCache.lrf[1]) + primaryWay = 2; } - if (!way) + if (!primaryWay) // Means that the entire cache line is available for writing - way = 255; + primaryWay = 255; u8 missPenalty{40}; - switch (way) { + switch (primaryWay) { case 0xff: - pear.ec[1].vec[1] = core.mipsRead((address + 64)); - pear.ec[1].vec[1] = core.mipsRead((address + 64) + 16); - pear.ec[1].vec[2] = core.mipsRead((address + 64) + 16 * 2); - pear.ec[1].vec[3] = core.mipsRead((address + 64) + 16 * 3); + writableCache.ways[1].vec[1] = mips.mipsRead((address + 64)); + writableCache.ways[1].vec[1] = mips.mipsRead((address + 64) + 16); + writableCache.ways[1].vec[2] = mips.mipsRead((address + 64) + 16 * 2); + writableCache.ways[1].vec[3] = mips.mipsRead((address + 64) + 16 * 3); missPenalty *= 4; - way = 1; + primaryWay = 1; case 1 ... 2: - pear.ec[way - 1].vec[0] = core.mipsRead(address + 0); - pear.ec[way - 1].vec[1] = core.mipsRead(address + 16); - pear.ec[way - 1].vec[2] = core.mipsRead(address + 16 * 2); - pear.ec[way - 1].vec[3] = core.mipsRead(address + 16 * 3); + writableCache.ways[primaryWay - 1].vec[0] = mips.mipsRead(address + 0); + writableCache.ways[primaryWay - 1].vec[1] = mips.mipsRead(address + 16); + writableCache.ways[primaryWay - 1].vec[2] = mips.mipsRead(address + 16 * 2); + writableCache.ways[primaryWay - 1].vec[3] = mips.mipsRead(address + 16 * 3); - if (way != 0xff) + if (primaryWay != 0xff) missPenalty *= 2; break; } - core.runCycles -= missPenalty; + mips.runCycles -= missPenalty; } u32 CtrlCop::getCachePfn(u32 address, CacheMode mode) { if (mode == Instruction) @@ -101,28 +101,29 @@ namespace cosmic::ee { return static_cast(address >> 12); } - void CtrlCop::assignFlushedCache(CopCacheLine& eec, u32 tag, CacheMode mode) { + void CtrlCop::assignFlushedCache(CopCacheLine& mixedCache, u32 tag, CacheMode mode) { // The EE uses a Least Recently Filled (LRF) algorithm to // determine which way to load data into u32 assign{}; const std::array mix{ - eec.tags[0] & dirtyBit, - eec.tags[1] & dirtyBit + mixedCache.tags[0] & dirtyBit, mixedCache.tags[1] & dirtyBit }; - if (mix[0] && !mix[1]) assign = 1; - if (mix[1] && !mix[0]) assign = 2; + if (mix[0] && !mix[1]) + assign = 1; + if (mix[1] && !mix[0]) + assign = 2; if (assign) { assign--; - eec.lrf[assign] = true; + mixedCache.lrf[assign] = true; } else { // The row to fill is the XOR of the LFU bits - assign = (eec.lrf[0] ^ eec.lrf[1]); - eec.lrf[assign] = true; + assign = (mixedCache.lrf[0] ^ mixedCache.lrf[1]); + mixedCache.lrf[assign] = true; } // Here is where we write the tag bits - eec.tags[assign] = tag | dirtyBit; + mixedCache.tags[assign] = tag | dirtyBit; } CopCacheLine& CtrlCop::getCache(u32 mem, bool write, CacheMode mode) { u32 cacheIndex; @@ -134,34 +135,37 @@ namespace cosmic::ee { cacheIndex = (mem >> 6) & 0x3f; selectedCache = dataCache; } - std::array, 2> wb{ - Optional(virtMap[selectedCache[cacheIndex].tags[0] >> 12]), - Optional(virtMap[selectedCache[cacheIndex].tags[1] >> 12]) - }; - std::array valid{ - selectedCache[cacheIndex].lrf[0], - selectedCache[cacheIndex].lrf[1] - }; + const auto firstWayLayer{selectedCache[cacheIndex].tags[0]}; - if (*wb[0] == virtMap[mem >> 12] && valid[0]) - return selectedCache[cacheIndex]; - if (*wb[1] == virtMap[mem >> 12] && valid[1]) - return selectedCache[cacheIndex]; - const u32 way{( - (selectedCache[cacheIndex].tags[0] >> 6) & 1) ^ ((selectedCache[cacheIndex].tags[1] >> 6) & 1) + const auto secondWayLayer{selectedCache[cacheIndex].tags[1]}; + + std::array, 2> maps{ + Optional(virtMap[firstWayLayer >> 12]), + Optional(virtMap[secondWayLayer >> 12]) }; - const auto isDirty{static_cast(selectedCache[cacheIndex].tags[way] & dirtyBit)}; + const auto firstLrf{selectedCache[cacheIndex].lrf[0]}; + const auto secondLrf{selectedCache[cacheIndex].lrf[1]}; + + for (u32 layers{}; layers < 2; layers++) { + if (maps[0] == virtMap[mem >> 12] && layers == 0 ? firstLrf : secondLrf) + return selectedCache[cacheIndex]; + } + + const u32 way{((firstWayLayer >> 6) & 1) ^ ((secondWayLayer >> 6) & 1)}; + + const auto isDirty{static_cast( + way == 0 ? firstWayLayer & dirtyBit : secondWayLayer & dirtyBit)}; if (write && mode == Data && isDirty) { - uintptr_t wrm{*(*wb[way]) + (mem & 0xfc0)}; - BitCast(wrm)[0] = selectedCache[cacheIndex].ec[way].large[0]; - BitCast(wrm)[1] = selectedCache[cacheIndex].ec[way].large[1]; - BitCast(wrm)[2] = selectedCache[cacheIndex].ec[way].large[2]; - BitCast(wrm)[3] = selectedCache[cacheIndex].ec[way].large[3]; - BitCast(wrm)[4] = selectedCache[cacheIndex].ec[way].large[4]; - BitCast(wrm)[5] = selectedCache[cacheIndex].ec[way].large[5]; - BitCast(wrm)[6] = selectedCache[cacheIndex].ec[way].large[6]; - BitCast(wrm)[7] = selectedCache[cacheIndex].ec[way].large[7]; + uintptr_t wrm{*(*maps[way]) + (mem & 0xfc0)}; + BitCast(wrm)[0] = selectedCache[cacheIndex].ways[way].large[0]; + BitCast(wrm)[1] = selectedCache[cacheIndex].ways[way].large[1]; + BitCast(wrm)[2] = selectedCache[cacheIndex].ways[way].large[2]; + BitCast(wrm)[3] = selectedCache[cacheIndex].ways[way].large[3]; + BitCast(wrm)[4] = selectedCache[cacheIndex].ways[way].large[4]; + BitCast(wrm)[5] = selectedCache[cacheIndex].ways[way].large[5]; + BitCast(wrm)[6] = selectedCache[cacheIndex].ways[way].large[6]; + BitCast(wrm)[7] = selectedCache[cacheIndex].ways[way].large[7]; } if (write) { // If we are writing to the cache, the dirty bit must be set diff --git a/app/src/main/cpp/cosmic/ee/cop_misc.cpp b/app/src/main/cpp/cosmic/ee/cop_misc.cpp index 61149ec..3088618 100644 --- a/app/src/main/cpp/cosmic/ee/cop_misc.cpp +++ b/app/src/main/cpp/cosmic/ee/cop_misc.cpp @@ -144,20 +144,23 @@ namespace cosmic::ee { GPRs[0] = code; redoTlbMapping(); break; case 14: // $14: EPC - inError = true; break; - case 30: inAnException = true; break; + case 30: + inError = true; break; + case 11: + cause.timerIp = {}; default: GPRs[reg] = code; break; } - if (inError && + const auto isAExcept{ isAHVector(code) && - haveAException()) { - ePc = code; - } - if (inAnException && isAHVector(code) && haveAException()) { + haveAException()}; + if (inError && isAExcept) { errorPc = code; } + if (inAnException && isAExcept) { + ePc = code; + } } } \ No newline at end of file diff --git a/app/src/main/cpp/cosmic/ee/ee_core.cpp b/app/src/main/cpp/cosmic/ee/ee_core.cpp index c62f363..2f0bedb 100644 --- a/app/src/main/cpp/cosmic/ee/ee_core.cpp +++ b/app/src/main/cpp/cosmic/ee/ee_core.cpp @@ -56,11 +56,11 @@ namespace cosmic::ee { } u32 EeMipsCore::fetchByPc() { - const u32 orderPC{lastPc}; + const u32 orderPc{lastPc}; [[unlikely]] if (!cop0.virtCache->isCached(eePc)) { // However, the EE loads two instructions at once u32 punishment{8}; - if ((orderPC + 4) != eePc) { + if ((orderPc + 4) != eePc) { // When reading an instruction out of sequential order, a penalty of 32 cycles is applied punishment = 32; } @@ -82,26 +82,27 @@ namespace cosmic::ee { const u32 stripPcAddr(u32 addr) const { return addr & 0xffff'fff0; } - auto& operator[](const u64 address) { + auto& operator[](const u32 address) { return nested[(address & 0xf) / 4]; } u32 basePc; bool isValid{}; std::array nested; }; - static CachedAddress cached{}; + static CachedAddress cached; if (cop0.virtCache->isCached(address)) { if (!cop0.isCacheHit(address, 2)) { cop0.loadCacheLine(address, *this); + cached.isValid = false; } } else { runCycles -= 8 / 2; return mipsRead(address); } u32 currBase{cached.stripPcAddr(address)}; - if (cached.isValid) - cached.isValid = currBase == cached.basePc; + if (cached.basePc != currBase) + cached.isValid = false; if (!cached.isValid) { const auto fasterInstructions{cop0.readCache(address)}; @@ -138,6 +139,7 @@ namespace cosmic::ee { if (executor) executor->performInvalidation(address); } + void EeMipsCore::branchByCondition(bool cond, i32 jumpRel) { if (!cond) return; diff --git a/app/src/main/cpp/cosmic/gs/gif_bridge.cpp b/app/src/main/cpp/cosmic/gs/gif_bridge.cpp index 4652c82..c1e5a7b 100644 --- a/app/src/main/cpp/cosmic/gs/gif_bridge.cpp +++ b/app/src/main/cpp/cosmic/gs/gif_bridge.cpp @@ -94,7 +94,7 @@ namespace cosmic::gs { return (whatPath == 1) && paths[whatPath].tag.isCompleted(); } - void GifBridge::transfer2Gif(os::vec packet) { + void GifBridge::transfer2Gif(const os::vec& packet) { std::array package{}; for (u8 pack{}; pack < 2; pack++) package[pack] = packet.to64(pack); diff --git a/app/src/main/cpp/cosmic/gs/gif_bridge.h b/app/src/main/cpp/cosmic/gs/gif_bridge.h index 8c60cf6..8eea991 100644 --- a/app/src/main/cpp/cosmic/gs/gif_bridge.h +++ b/app/src/main/cpp/cosmic/gs/gif_bridge.h @@ -65,7 +65,7 @@ namespace cosmic::gs { void update(u32 cycles); private: - void transfer2Gif(os::vec packet); + void transfer2Gif(const os::vec& packet); void decodeGifTag(GifTag& unpacked, u64 packet[2]); void uploadPackedData(GifTag& dsTag, u64 packet[2]); void queueReset(); diff --git a/app/src/main/cpp/cosmic/os/neon_simd.h b/app/src/main/cpp/cosmic/os/neon_simd.h index 56e4373..97fd7c2 100644 --- a/app/src/main/cpp/cosmic/os/neon_simd.h +++ b/app/src/main/cpp/cosmic/os/neon_simd.h @@ -3,6 +3,7 @@ #include namespace cosmic::os { struct vec { + vec() = default; vec(u64 qWord0, u64 qWord1 = 0) { native = vsetq_lane_u64(qWord0, native, 0); native = vsetq_lane_u64(qWord1, native, 1); @@ -10,6 +11,7 @@ namespace cosmic::os { vec(const u128 val) { native = val; } + /* vec() { auto mask{static_cast(vmovq_n_u64(0))}; // The mask will be combined with the first value passed to vsetq_lane_u64 to form @@ -19,10 +21,11 @@ namespace cosmic::os { native = vandq_u64(native, mask); } + */ auto get() const { return native; } - inline u32 to32(u32 lane) const { + inline u32 to32(const u64 lane) const { switch (lane) { case 0: return vgetq_lane_u32(native, 0); case 1: return vgetq_lane_u32(native, 1); @@ -31,7 +34,7 @@ namespace cosmic::os { } return {}; } - inline u64 to64(u32 lane) const { + inline u64 to64(const u64 lane) const { return lane == 0 ? vgetq_lane_u64(native, 0) : vgetq_lane_u64(native, 1); } template