mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-14 09:28:34 +00:00
CoreState: Adds avx_high structure for tracking decoupled AVX halves.
Needed something inbetween the `InlineJITBlockHeader` and `avx_high` in order to match alignment requirements of 16-byte for avx_high. Chose the `DeferredSignalRefCount` because we hit it quite frequently and it is basically the only 64-bit variable that we end up touching significantly. In the future the CPUState object is going to need to change its view of the object depending on if the device supports SVE256 or not, but we don't need to frontload the work right now. It'll become significantly easier to support that path once the RCLSE pass gets deleted.
This commit is contained in:
parent
9a71443005
commit
bf812aae8f
@ -97,6 +97,27 @@ static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool S
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
// DeferredSignalRefCount
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount),
|
||||
sizeof(FEXCore::Core::CPUState::DeferredSignalRefCount),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, avx_high[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_SSE_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, rip),
|
||||
@ -117,6 +138,48 @@ static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool S
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, _pad),
|
||||
sizeof(FEXCore::Core::CPUState::_pad),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
if (SupportsSVE256) {
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]) + FEXCore::Core::CPUState::XMM_AVX_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_AVX_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.sse.data[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_SSE_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.sse.pad[0][0]),
|
||||
static_cast<uint16_t>(FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * FEXCore::Core::CPUState::NUM_XMMS),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, es_idx),
|
||||
@ -173,8 +236,8 @@ static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool S
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, _pad),
|
||||
sizeof(FEXCore::Core::CPUState::_pad),
|
||||
offsetof(FEXCore::Core::CPUState, _pad2),
|
||||
sizeof(FEXCore::Core::CPUState::_pad2),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
@ -234,39 +297,6 @@ static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool S
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
if (SupportsSVE256) {
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]) + FEXCore::Core::CPUState::XMM_AVX_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_AVX_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.sse.data[0][0]) + FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * i,
|
||||
FEXCore::Core::CPUState::XMM_SSE_REG_SIZE,
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, xmm.sse.pad[0][0]),
|
||||
static_cast<uint16_t>(FEXCore::Core::CPUState::XMM_SSE_REG_SIZE * FEXCore::Core::CPUState::NUM_XMMS),
|
||||
},
|
||||
LastAccessType::INVALID,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) {
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
@ -337,21 +367,11 @@ static void ClassifyContextStruct(ContextInfo* ContextClassificationInfo, bool S
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
// _pad2
|
||||
// _pad3
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, _pad2),
|
||||
sizeof(FEXCore::Core::CPUState::_pad2),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
});
|
||||
|
||||
// DeferredSignalRefCount
|
||||
ContextClassification->emplace_back(ContextMemberInfo {
|
||||
ContextMemberClassification {
|
||||
offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount),
|
||||
sizeof(FEXCore::Core::CPUState::DeferredSignalRefCount),
|
||||
offsetof(FEXCore::Core::CPUState, _pad3),
|
||||
sizeof(FEXCore::Core::CPUState::_pad3),
|
||||
},
|
||||
LastAccessType::NONE,
|
||||
FEXCore::IR::InvalidClass,
|
||||
@ -385,12 +405,21 @@ static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo,
|
||||
ContextClassification->at(Offset).AccessOffset = 0;
|
||||
ContextClassification->at(Offset).StoreNode = nullptr;
|
||||
};
|
||||
|
||||
size_t Offset = 0;
|
||||
|
||||
///< InlineJITBlockHeader
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
///< rip
|
||||
// DeferredSignalRefCount
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
///< avx_high
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// rip
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< gregs
|
||||
@ -398,6 +427,19 @@ static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo,
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// pad
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
// xmm
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// xmm_pad
|
||||
if (!SupportsSVE256) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
// Segment indexes
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
@ -406,7 +448,7 @@ static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo,
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
// Pad
|
||||
// Pad2
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
// Segments
|
||||
@ -417,16 +459,6 @@ static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo,
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< xmm
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
if (!SupportsSVE256) {
|
||||
///< xmm pad if AVX isn't supported.
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
}
|
||||
|
||||
///< flags
|
||||
for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_FLAGS; ++i) {
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
@ -454,10 +486,7 @@ static void ResetClassificationAccesses(ContextInfo* ContextClassificationInfo,
|
||||
///< AbridgedFTW
|
||||
SetAccess(Offset++, LastAccessType::NONE);
|
||||
|
||||
///< _pad2
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
|
||||
///< DeferredSignalRefCount
|
||||
// pad3
|
||||
SetAccess(Offset++, LastAccessType::INVALID);
|
||||
}
|
||||
|
||||
|
@ -89,18 +89,27 @@ struct CPUState {
|
||||
};
|
||||
|
||||
uint64_t InlineJITBlockHeader {};
|
||||
// Reference counter for FEX's per-thread deferred signals.
|
||||
// Counts the nesting depth of program sections that cause signals to be deferred.
|
||||
NonAtomicRefCounter<uint64_t> DeferredSignalRefCount;
|
||||
|
||||
// The high 128-bits of AVX registers when not being emulated by SVE256.
|
||||
uint64_t avx_high[16][2];
|
||||
|
||||
uint64_t rip {}; ///< Current core's RIP. May not be entirely accurate while JIT is active
|
||||
uint64_t gregs[16] {};
|
||||
uint64_t _pad {};
|
||||
XMMRegs xmm {};
|
||||
|
||||
// Raw segment register indexes
|
||||
uint16_t es_idx {}, cs_idx {}, ss_idx {}, ds_idx {};
|
||||
uint16_t gs_idx {}, fs_idx {};
|
||||
uint16_t _pad[2];
|
||||
uint16_t _pad2[2];
|
||||
|
||||
// Segment registers holding base addresses
|
||||
uint32_t es_cached {}, cs_cached {}, ss_cached {}, ds_cached {};
|
||||
uint64_t gs_cached {};
|
||||
uint64_t fs_cached {};
|
||||
XMMRegs xmm {};
|
||||
uint8_t flags[48] {};
|
||||
uint64_t pf_raw {};
|
||||
uint64_t af_raw {};
|
||||
@ -113,11 +122,7 @@ struct CPUState {
|
||||
uint16_t FCW {0x37F};
|
||||
uint8_t AbridgedFTW {};
|
||||
|
||||
uint8_t _pad2[5];
|
||||
// Reference counter for FEX's per-thread deferred signals.
|
||||
// Counts the nesting depth of program sections that cause signals to be deferred.
|
||||
NonAtomicRefCounter<uint64_t> DeferredSignalRefCount;
|
||||
|
||||
uint8_t _pad3[5];
|
||||
// PF/AF are statically mapped as-if they were r16/r17 (which do not exist in
|
||||
// x86 otherwise). This allows a straightforward mapping for SRA.
|
||||
static constexpr uint8_t PF_AS_GREG = 16;
|
||||
@ -161,6 +166,7 @@ struct CPUState {
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<CPUState>, "Needs to be trivial");
|
||||
static_assert(std::is_standard_layout_v<CPUState>, "This needs to be standard layout");
|
||||
static_assert(offsetof(CPUState, avx_high) % 16 == 0, "avx_high needs to be 128-bit aligned!");
|
||||
static_assert(offsetof(CPUState, xmm) % 32 == 0, "xmm needs to be 256-bit aligned!");
|
||||
static_assert(offsetof(CPUState, mm) % 16 == 0, "mm needs to be 128-bit aligned!");
|
||||
static_assert(offsetof(CPUState, gregs[15]) <= 504, "gregs maximum offset must be <= 504 for ldp/stp to work");
|
||||
|
Loading…
Reference in New Issue
Block a user