EE: Improve the nomenclature used in the EE cache

This commit is contained in:
Gabriel Correia 2024-06-22 00:58:11 -03:00
parent bafa64ea82
commit 7037d9c363
7 changed files with 94 additions and 82 deletions

View File

@ -18,9 +18,9 @@ namespace cosmic::ee {
union CacheWay {
u32 u[16];
u64 large[8];
os::vec vec[4]{};
os::vec vec[4];
};
std::array<CacheWay, 2> ec;
std::array<CacheWay, 2> ways;
};
enum CacheMode {
Instruction,

View File

@ -17,17 +17,17 @@ namespace cosmic::ee {
if (!lineLayer) {
throw Cop0Err("Address {:#x} isn't cached or doesn't have a valid tag referencing it", address);
}
const auto& cont{cachedData.ec[lineLayer - 1]};
const auto& cont{cachedData.ways[lineLayer - 1]};
return cont.vec[(address >> 4) & 3];
}
void CtrlCop::invIndexed(u32 address) {
auto cc{getCache(address, true)};
cc.tags[0] &= ~dirtyBit;
cc.tags[1] &= ~dirtyBit;
cc.lrf[0] = cc.lrf[1] = {
auto invWaysAt{getCache(address, true)};
invWaysAt.tags[0] &= ~dirtyBit;
invWaysAt.tags[1] &= ~dirtyBit;
invWaysAt.lrf[0] = invWaysAt.lrf[1] = {
};
std::memset(cc.ec.data(), 0, sizeof(cc.ec));
std::memset(invWaysAt.ways.data(), 0, sizeof(invWaysAt.ways));
}
bool CtrlCop::isCacheHit(u32 address, u8 lane, CacheMode mode) {
// Each cache line is indexed by virtual address
@ -48,51 +48,51 @@ namespace cosmic::ee {
}
return {};
}
void CtrlCop::loadCacheLine(u32 address, EeMipsCore& core, CacheMode mode) {
void CtrlCop::loadCacheLine(u32 address, EeMipsCore& mips, CacheMode mode) {
auto masterIndex{
getCachePfn(address, mode)};
auto pear{
auto writableCache{
getCache(address, true, mode)};
assignFlushedCache(pear, masterIndex);
assignFlushedCache(writableCache, masterIndex);
masterIndex |= dirtyBit;
if ((pear.tags[0] != masterIndex &&
pear.tags[1] != masterIndex)) {
if ((writableCache.tags[0] != masterIndex &&
writableCache.tags[1] != masterIndex)) {
throw Cop0Err("No portion of the cache line {:#x} was properly selected! Tags: {:#x}",
masterIndex, fmt::join(pear.tags, ", "));
masterIndex, fmt::join(writableCache.tags, ", "));
}
// Due to the LRF algorithm, we will write to the way that was written last (thus keeping
// the last data among the ways in the cache, waiting for one more miss)
u8 way{pear.lrf[0] && !pear.lrf[1]};
if (!way) {
if (!pear.lrf[0] && pear.lrf[1])
way = 2;
u8 primaryWay{writableCache.lrf[0] && !writableCache.lrf[1]};
if (!primaryWay) {
if (!writableCache.lrf[0] && writableCache.lrf[1])
primaryWay = 2;
}
if (!way)
if (!primaryWay)
// Means that the entire cache line is available for writing
way = 255;
primaryWay = 255;
u8 missPenalty{40};
switch (way) {
switch (primaryWay) {
case 0xff:
pear.ec[1].vec[1] = core.mipsRead<os::vec>((address + 64));
pear.ec[1].vec[1] = core.mipsRead<os::vec>((address + 64) + 16);
pear.ec[1].vec[2] = core.mipsRead<os::vec>((address + 64) + 16 * 2);
pear.ec[1].vec[3] = core.mipsRead<os::vec>((address + 64) + 16 * 3);
writableCache.ways[1].vec[1] = mips.mipsRead<os::vec>((address + 64));
writableCache.ways[1].vec[1] = mips.mipsRead<os::vec>((address + 64) + 16);
writableCache.ways[1].vec[2] = mips.mipsRead<os::vec>((address + 64) + 16 * 2);
writableCache.ways[1].vec[3] = mips.mipsRead<os::vec>((address + 64) + 16 * 3);
missPenalty *= 4;
way = 1;
primaryWay = 1;
case 1 ... 2:
pear.ec[way - 1].vec[0] = core.mipsRead<os::vec>(address + 0);
pear.ec[way - 1].vec[1] = core.mipsRead<os::vec>(address + 16);
pear.ec[way - 1].vec[2] = core.mipsRead<os::vec>(address + 16 * 2);
pear.ec[way - 1].vec[3] = core.mipsRead<os::vec>(address + 16 * 3);
writableCache.ways[primaryWay - 1].vec[0] = mips.mipsRead<os::vec>(address + 0);
writableCache.ways[primaryWay - 1].vec[1] = mips.mipsRead<os::vec>(address + 16);
writableCache.ways[primaryWay - 1].vec[2] = mips.mipsRead<os::vec>(address + 16 * 2);
writableCache.ways[primaryWay - 1].vec[3] = mips.mipsRead<os::vec>(address + 16 * 3);
if (way != 0xff)
if (primaryWay != 0xff)
missPenalty *= 2;
break;
}
core.runCycles -= missPenalty;
mips.runCycles -= missPenalty;
}
u32 CtrlCop::getCachePfn(u32 address, CacheMode mode) {
if (mode == Instruction)
@ -101,28 +101,29 @@ namespace cosmic::ee {
return static_cast<u16>(address >> 12);
}
void CtrlCop::assignFlushedCache(CopCacheLine& eec, u32 tag, CacheMode mode) {
void CtrlCop::assignFlushedCache(CopCacheLine& mixedCache, u32 tag, CacheMode mode) {
// The EE uses a Least Recently Filled (LRF) algorithm to
// determine which way to load data into
u32 assign{};
const std::array<u32, 2> mix{
eec.tags[0] & dirtyBit,
eec.tags[1] & dirtyBit
mixedCache.tags[0] & dirtyBit, mixedCache.tags[1] & dirtyBit
};
if (mix[0] && !mix[1]) assign = 1;
if (mix[1] && !mix[0]) assign = 2;
if (mix[0] && !mix[1])
assign = 1;
if (mix[1] && !mix[0])
assign = 2;
if (assign) {
assign--;
eec.lrf[assign] = true;
mixedCache.lrf[assign] = true;
} else {
// The row to fill is the XOR of the LFU bits
assign = (eec.lrf[0] ^ eec.lrf[1]);
eec.lrf[assign] = true;
assign = (mixedCache.lrf[0] ^ mixedCache.lrf[1]);
mixedCache.lrf[assign] = true;
}
// Here is where we write the tag bits
eec.tags[assign] = tag | dirtyBit;
mixedCache.tags[assign] = tag | dirtyBit;
}
CopCacheLine& CtrlCop::getCache(u32 mem, bool write, CacheMode mode) {
u32 cacheIndex;
@ -134,34 +135,37 @@ namespace cosmic::ee {
cacheIndex = (mem >> 6) & 0x3f;
selectedCache = dataCache;
}
std::array<Optional<u8*>, 2> wb{
Optional(virtMap[selectedCache[cacheIndex].tags[0] >> 12]),
Optional(virtMap[selectedCache[cacheIndex].tags[1] >> 12])
};
std::array<bool, 2> valid{
selectedCache[cacheIndex].lrf[0],
selectedCache[cacheIndex].lrf[1]
};
const auto firstWayLayer{selectedCache[cacheIndex].tags[0]};
if (*wb[0] == virtMap[mem >> 12] && valid[0])
return selectedCache[cacheIndex];
if (*wb[1] == virtMap[mem >> 12] && valid[1])
return selectedCache[cacheIndex];
const u32 way{(
(selectedCache[cacheIndex].tags[0] >> 6) & 1) ^ ((selectedCache[cacheIndex].tags[1] >> 6) & 1)
const auto secondWayLayer{selectedCache[cacheIndex].tags[1]};
std::array<Optional<u8*>, 2> maps{
Optional(virtMap[firstWayLayer >> 12]),
Optional(virtMap[secondWayLayer >> 12])
};
const auto isDirty{static_cast<bool>(selectedCache[cacheIndex].tags[way] & dirtyBit)};
const auto firstLrf{selectedCache[cacheIndex].lrf[0]};
const auto secondLrf{selectedCache[cacheIndex].lrf[1]};
for (u32 layers{}; layers < 2; layers++) {
if (maps[0] == virtMap[mem >> 12] && layers == 0 ? firstLrf : secondLrf)
return selectedCache[cacheIndex];
}
const u32 way{((firstWayLayer >> 6) & 1) ^ ((secondWayLayer >> 6) & 1)};
const auto isDirty{static_cast<bool>(
way == 0 ? firstWayLayer & dirtyBit : secondWayLayer & dirtyBit)};
if (write && mode == Data && isDirty) {
uintptr_t wrm{*(*wb[way]) + (mem & 0xfc0)};
BitCast<u64*>(wrm)[0] = selectedCache[cacheIndex].ec[way].large[0];
BitCast<u64*>(wrm)[1] = selectedCache[cacheIndex].ec[way].large[1];
BitCast<u64*>(wrm)[2] = selectedCache[cacheIndex].ec[way].large[2];
BitCast<u64*>(wrm)[3] = selectedCache[cacheIndex].ec[way].large[3];
BitCast<u64*>(wrm)[4] = selectedCache[cacheIndex].ec[way].large[4];
BitCast<u64*>(wrm)[5] = selectedCache[cacheIndex].ec[way].large[5];
BitCast<u64*>(wrm)[6] = selectedCache[cacheIndex].ec[way].large[6];
BitCast<u64*>(wrm)[7] = selectedCache[cacheIndex].ec[way].large[7];
uintptr_t wrm{*(*maps[way]) + (mem & 0xfc0)};
BitCast<u64*>(wrm)[0] = selectedCache[cacheIndex].ways[way].large[0];
BitCast<u64*>(wrm)[1] = selectedCache[cacheIndex].ways[way].large[1];
BitCast<u64*>(wrm)[2] = selectedCache[cacheIndex].ways[way].large[2];
BitCast<u64*>(wrm)[3] = selectedCache[cacheIndex].ways[way].large[3];
BitCast<u64*>(wrm)[4] = selectedCache[cacheIndex].ways[way].large[4];
BitCast<u64*>(wrm)[5] = selectedCache[cacheIndex].ways[way].large[5];
BitCast<u64*>(wrm)[6] = selectedCache[cacheIndex].ways[way].large[6];
BitCast<u64*>(wrm)[7] = selectedCache[cacheIndex].ways[way].large[7];
}
if (write) {
// If we are writing to the cache, the dirty bit must be set

View File

@ -144,20 +144,23 @@ namespace cosmic::ee {
GPRs[0] = code;
redoTlbMapping(); break;
case 14: // $14: EPC
inError = true; break;
case 30:
inAnException = true; break;
case 30:
inError = true; break;
case 11:
cause.timerIp = {};
default:
GPRs[reg] = code; break;
}
if (inError &&
const auto isAExcept{
isAHVector(code) &&
haveAException()) {
ePc = code;
}
if (inAnException && isAHVector(code) && haveAException()) {
haveAException()};
if (inError && isAExcept) {
errorPc = code;
}
if (inAnException && isAExcept) {
ePc = code;
}
}
}

View File

@ -56,11 +56,11 @@ namespace cosmic::ee {
}
u32 EeMipsCore::fetchByPc() {
const u32 orderPC{lastPc};
const u32 orderPc{lastPc};
[[unlikely]] if (!cop0.virtCache->isCached(eePc)) {
// However, the EE loads two instructions at once
u32 punishment{8};
if ((orderPC + 4) != eePc) {
if ((orderPc + 4) != eePc) {
// When reading an instruction out of sequential order, a penalty of 32 cycles is applied
punishment = 32;
}
@ -82,26 +82,27 @@ namespace cosmic::ee {
const u32 stripPcAddr(u32 addr) const {
return addr & 0xffff'fff0;
}
auto& operator[](const u64 address) {
auto& operator[](const u32 address) {
return nested[(address & 0xf) / 4];
}
u32 basePc;
bool isValid{};
std::array<u32, 4> nested;
};
static CachedAddress cached{};
static CachedAddress cached;
if (cop0.virtCache->isCached(address)) {
if (!cop0.isCacheHit(address, 2)) {
cop0.loadCacheLine(address, *this);
cached.isValid = false;
}
} else {
runCycles -= 8 / 2;
return mipsRead<u32>(address);
}
u32 currBase{cached.stripPcAddr(address)};
if (cached.isValid)
cached.isValid = currBase == cached.basePc;
if (cached.basePc != currBase)
cached.isValid = false;
if (!cached.isValid) {
const auto fasterInstructions{cop0.readCache(address)};
@ -138,6 +139,7 @@ namespace cosmic::ee {
if (executor)
executor->performInvalidation(address);
}
void EeMipsCore::branchByCondition(bool cond, i32 jumpRel) {
if (!cond)
return;

View File

@ -94,7 +94,7 @@ namespace cosmic::gs {
return (whatPath == 1) &&
paths[whatPath].tag.isCompleted();
}
void GifBridge::transfer2Gif(os::vec packet) {
void GifBridge::transfer2Gif(const os::vec& packet) {
std::array<u64, 2> package{};
for (u8 pack{}; pack < 2; pack++)
package[pack] = packet.to64(pack);

View File

@ -65,7 +65,7 @@ namespace cosmic::gs {
void update(u32 cycles);
private:
void transfer2Gif(os::vec packet);
void transfer2Gif(const os::vec& packet);
void decodeGifTag(GifTag& unpacked, u64 packet[2]);
void uploadPackedData(GifTag& dsTag, u64 packet[2]);
void queueReset();

View File

@ -3,6 +3,7 @@
#include <common/types.h>
namespace cosmic::os {
struct vec {
vec() = default;
vec(u64 qWord0, u64 qWord1 = 0) {
native = vsetq_lane_u64(qWord0, native, 0);
native = vsetq_lane_u64(qWord1, native, 1);
@ -10,6 +11,7 @@ namespace cosmic::os {
vec(const u128 val) {
native = val;
}
/*
vec() {
auto mask{static_cast<u128>(vmovq_n_u64(0))};
// The mask will be combined with the first value passed to vsetq_lane_u64 to form
@ -19,10 +21,11 @@ namespace cosmic::os {
native = vandq_u64(native, mask);
}
*/
auto get() const {
return native;
}
inline u32 to32(u32 lane) const {
inline u32 to32(const u64 lane) const {
switch (lane) {
case 0: return vgetq_lane_u32(native, 0);
case 1: return vgetq_lane_u32(native, 1);
@ -31,7 +34,7 @@ namespace cosmic::os {
}
return {};
}
inline u64 to64(u32 lane) const {
inline u64 to64(const u64 lane) const {
return lane == 0 ? vgetq_lane_u64(native, 0) : vgetq_lane_u64(native, 1);
}
template <typename T, u64 lane = 0>