IR: infer SRA static class

no need to stick it in the IR.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-05-06 10:53:43 -04:00
parent 74489a4177
commit b91b0e9d65
6 changed files with 29 additions and 30 deletions

View File

@ -3396,8 +3396,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size); auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
// We'll calculate PF/AF after the loop, so use them as temporaries here. // We'll calculate PF/AF after the loop, so use them as temporaries here.
_StoreRegister(Src1, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); _StoreRegister(Src1, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
_StoreRegister(Src2, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); _StoreRegister(Src2, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX); OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX);
@ -3436,8 +3436,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
// Make sure to start a new block after ending this one // Make sure to start a new block after ending this one
{ {
// Grab the sources from the last iteration so we can set flags. // Grab the sources from the last iteration so we can set flags.
auto Src1 = _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); auto Src1 = _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
auto Src2 = _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); auto Src2 = _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
GenerateFlags_SUB(Op, Src2, Src1); GenerateFlags_SUB(Op, Src2, Src1);
CalculateDeferredFlags(); CalculateDeferredFlags();
} }
@ -4406,7 +4406,7 @@ OrderedNode* OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8
if (Size == -1) { if (Size == -1) {
Size = GPRSize; Size = GPRSize;
} }
OrderedNode* Reg = _LoadRegister(offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize); OrderedNode* Reg = _LoadRegister(offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRSize);
if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) { if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) {
// Extract the subregister if requested. // Extract the subregister if requested.
@ -4425,7 +4425,7 @@ OrderedNode* OpDispatchBuilder::LoadXMMRegister(uint32_t XMM) {
const auto VectorOffset = const auto VectorOffset =
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]); CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
OrderedNode* Reg = _LoadRegister(VectorOffset, FPRClass, FPRFixedClass, VectorSize); OrderedNode* Reg = _LoadRegister(VectorOffset, FPRClass, VectorSize);
return Reg; return Reg;
} }
@ -4442,7 +4442,7 @@ void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode* const Src, i
Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, Reg, Src); Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, Reg, Src);
} }
_StoreRegister(Reg, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize); _StoreRegister(Reg, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRSize);
} }
void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) { void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) {
@ -4450,7 +4450,7 @@ void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) {
const auto VectorOffset = const auto VectorOffset =
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]); CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
_StoreRegister(Src, VectorOffset, FPRClass, FPRFixedClass, VectorSize); _StoreRegister(Src, VectorOffset, FPRClass, VectorSize);
} }
OrderedNode* OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op, OrderedNode* OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op,

View File

@ -1402,9 +1402,9 @@ private:
if (IsNZCV(BitOffset)) { if (IsNZCV(BitOffset)) {
InsertNZCV(BitOffset, Value, ValueOffset, MustMask); InsertNZCV(BitOffset, Value, ValueOffset, MustMask);
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) { } else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
_StoreRegister(Value, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); _StoreRegister(Value, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) { } else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
_StoreRegister(Value, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); _StoreRegister(Value, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
} else { } else {
if (ValueOffset || MustMask) { if (ValueOffset || MustMask) {
Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value); Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
@ -1459,9 +1459,9 @@ private:
return _NZCVSelect(OpSize::i32Bit, CondForNZCVBit(BitOffset, Invert), _Constant(1), _Constant(0)); return _NZCVSelect(OpSize::i32Bit, CondForNZCVBit(BitOffset, Invert), _Constant(1), _Constant(0));
} }
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) { } else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
return _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); return _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) { } else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
return _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize()); return _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) { } else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
// Recover the sign bit, it is the logical DF value // Recover the sign bit, it is the logical DF value
return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63)); return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63));

View File

@ -227,8 +227,7 @@ struct ThunkHandler_impl final : public ThunkHandler {
const uint8_t GPRSize = CTX->GetGPRSize(); const uint8_t GPRSize = CTX->GetGPRSize();
if (GPRSize == 8) { if (GPRSize == 8) {
emit->_StoreRegister(emit->_Constant(Entrypoint), offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass, emit->_StoreRegister(emit->_Constant(Entrypoint), offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass, GPRSize);
IR::GPRFixedClass, GPRSize);
} else { } else {
emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0])); emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0]));
} }

View File

@ -343,14 +343,14 @@
} }
}, },
"StaticRA": { "StaticRA": {
"SSA = LoadRegister u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": { "SSA = LoadRegister u32:$Offset, RegisterClass:$Class, u8:#Size": {
"Desc": ["Loads a value from the static-ra context with offset", "Desc": ["Loads a value from the static-ra context with offset",
"Dest = Ctx[Offset]" "Dest = Ctx[Offset]"
], ],
"DestSize": "Size" "DestSize": "Size"
}, },
"StoreRegister SSA:$Value, u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": { "StoreRegister SSA:$Value, u32:$Offset, RegisterClass:$Class, u8:#Size": {
"HasSideEffects": true, "HasSideEffects": true,
"Desc": ["Stores a value to the static-ra context with offset", "Desc": ["Stores a value to the static-ra context with offset",
"Ctx[Offset] = Value", "Ctx[Offset] = Value",

View File

@ -283,7 +283,7 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
case OP_LOADREGISTER: { case OP_LOADREGISTER: {
auto Op = IROp->CW<IR::IROp_LoadRegister>(); auto Op = IROp->CW<IR::IROp_LoadRegister>();
if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) { if (Op->Class != GPRClass) {
break; break;
} }
@ -292,7 +292,7 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
case OP_STOREREGISTER: { case OP_STOREREGISTER: {
auto Op = IROp->CW<IR::IROp_StoreRegister>(); auto Op = IROp->CW<IR::IROp_StoreRegister>();
if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) { if (Op->Class != GPRClass) {
break; break;
} }

View File

@ -465,23 +465,23 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
// Helpers // Helpers
// Is an OP_STOREREGISTER eligible to write directly to the SRA reg? // Is an OP_STOREREGISTER eligible to write directly to the SRA reg?
auto IsPreWritable = [this](uint8_t Size, RegisterClassType StaticClass) { auto IsPreWritable = [this](uint8_t Size, RegisterClassType Class) {
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass); LOGMAN_THROW_A_FMT(Class == GPRClass || Class == FPRClass, "Unexpected class {}", Class);
if (StaticClass == GPRFixedClass) { if (Class == GPRClass) {
return Size == 8 || Size == 4; return Size == 8 || Size == 4;
} else if (StaticClass == FPRFixedClass) { } else if (Class == FPRClass) {
return Size == 16 || (Size == 32 && SupportsAVX); return Size == 16 || (Size == 32 && SupportsAVX);
} }
return false; // Unknown return false; // Unknown
}; };
// Is an OP_LOADREGISTER eligible to read directly from the SRA reg? // Is an OP_LOADREGISTER eligible to read directly from the SRA reg?
auto IsAliasable = [this](uint8_t Size, RegisterClassType StaticClass, uint32_t Offset) { auto IsAliasable = [this](uint8_t Size, RegisterClassType Class, uint32_t Offset) {
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass); LOGMAN_THROW_A_FMT(Class == GPRClass || Class == FPRClass, "Unexpected class {}", Class);
if (StaticClass == GPRFixedClass) { if (Class == GPRClass) {
// We need more meta info to support not-size-of-reg // We need more meta info to support not-size-of-reg
return (Size == 8 || Size == 4) && ((Offset & 7) == 0); return (Size == 8 || Size == 4) && ((Offset & 7) == 0);
} else if (StaticClass == FPRFixedClass) { } else if (Class == FPRClass) {
// We need more meta info to support not-size-of-reg // We need more meta info to support not-size-of-reg
if (Size == 32 && SupportsAVX && (Offset & 31) == 0) { if (Size == 32 && SupportsAVX && (Offset & 31) == 0) {
return true; return true;
@ -592,13 +592,13 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
const auto OpID = Op->Value.ID(); const auto OpID = Op->Value.ID();
auto& OpLiveRange = LiveRanges[OpID.Value]; auto& OpLiveRange = LiveRanges[OpID.Value];
if (IsPreWritable(IROp->Size, Op->StaticClass) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) { if (IsPreWritable(IROp->Size, Op->Class) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) {
// Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens // Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens
SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node); SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node);
OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset);
OpLiveRange.PreWritten = Node; OpLiveRange.PreWritten = Node;
SetNodeClass(Graph, OpID, Op->StaticClass); SetNodeClass(Graph, OpID, Op->Class == FPRClass ? FPRFixedClass : GPRFixedClass);
} }
} }
} }
@ -679,7 +679,7 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
// if not sra-allocated and full size, sra-allocate // if not sra-allocated and full size, sra-allocate
if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) { if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) {
// only full size reads can be aliased // only full size reads can be aliased
if (IsAliasable(IROp->Size, Op->StaticClass, Op->Offset)) { if (IsAliasable(IROp->Size, Op->Class, Op->Offset)) {
// We can only track a single active span. // We can only track a single active span.
// Marking here as written is overly agressive, but // Marking here as written is overly agressive, but
// there might be write(s) later on the instruction stream // there might be write(s) later on the instruction stream
@ -692,7 +692,7 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); // 0, 1, and so on NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); // 0, 1, and so on
(*StaticMap) = &NodeLiveRange; (*StaticMap) = &NodeLiveRange;
SetNodeClass(Graph, Node, Op->StaticClass); SetNodeClass(Graph, Node, Op->Class == FPRClass ? FPRFixedClass : GPRFixedClass);
SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/); SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/);
} }
} }