mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-11-27 08:40:32 +00:00
IR: infer SRA static class
no need to stick it in the IR. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
parent
74489a4177
commit
b91b0e9d65
@ -3396,8 +3396,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
|
|||||||
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
|
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
|
||||||
|
|
||||||
// We'll calculate PF/AF after the loop, so use them as temporaries here.
|
// We'll calculate PF/AF after the loop, so use them as temporaries here.
|
||||||
_StoreRegister(Src1, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
_StoreRegister(Src1, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
|
||||||
_StoreRegister(Src2, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
_StoreRegister(Src2, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
|
||||||
|
|
||||||
OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX);
|
||||||
|
|
||||||
@ -3436,8 +3436,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
|
|||||||
// Make sure to start a new block after ending this one
|
// Make sure to start a new block after ending this one
|
||||||
{
|
{
|
||||||
// Grab the sources from the last iteration so we can set flags.
|
// Grab the sources from the last iteration so we can set flags.
|
||||||
auto Src1 = _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
auto Src1 = _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
|
||||||
auto Src2 = _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
auto Src2 = _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
|
||||||
GenerateFlags_SUB(Op, Src2, Src1);
|
GenerateFlags_SUB(Op, Src2, Src1);
|
||||||
CalculateDeferredFlags();
|
CalculateDeferredFlags();
|
||||||
}
|
}
|
||||||
@ -4406,7 +4406,7 @@ OrderedNode* OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8
|
|||||||
if (Size == -1) {
|
if (Size == -1) {
|
||||||
Size = GPRSize;
|
Size = GPRSize;
|
||||||
}
|
}
|
||||||
OrderedNode* Reg = _LoadRegister(offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize);
|
OrderedNode* Reg = _LoadRegister(offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRSize);
|
||||||
|
|
||||||
if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) {
|
if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) {
|
||||||
// Extract the subregister if requested.
|
// Extract the subregister if requested.
|
||||||
@ -4425,7 +4425,7 @@ OrderedNode* OpDispatchBuilder::LoadXMMRegister(uint32_t XMM) {
|
|||||||
const auto VectorOffset =
|
const auto VectorOffset =
|
||||||
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
|
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
|
||||||
|
|
||||||
OrderedNode* Reg = _LoadRegister(VectorOffset, FPRClass, FPRFixedClass, VectorSize);
|
OrderedNode* Reg = _LoadRegister(VectorOffset, FPRClass, VectorSize);
|
||||||
return Reg;
|
return Reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4442,7 +4442,7 @@ void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode* const Src, i
|
|||||||
Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, Reg, Src);
|
Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, Reg, Src);
|
||||||
}
|
}
|
||||||
|
|
||||||
_StoreRegister(Reg, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize);
|
_StoreRegister(Reg, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) {
|
void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) {
|
||||||
@ -4450,7 +4450,7 @@ void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) {
|
|||||||
const auto VectorOffset =
|
const auto VectorOffset =
|
||||||
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
|
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
|
||||||
|
|
||||||
_StoreRegister(Src, VectorOffset, FPRClass, FPRFixedClass, VectorSize);
|
_StoreRegister(Src, VectorOffset, FPRClass, VectorSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
OrderedNode* OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op,
|
OrderedNode* OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op,
|
||||||
|
@ -1402,9 +1402,9 @@ private:
|
|||||||
if (IsNZCV(BitOffset)) {
|
if (IsNZCV(BitOffset)) {
|
||||||
InsertNZCV(BitOffset, Value, ValueOffset, MustMask);
|
InsertNZCV(BitOffset, Value, ValueOffset, MustMask);
|
||||||
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
|
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
|
||||||
_StoreRegister(Value, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
_StoreRegister(Value, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
|
||||||
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
|
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
|
||||||
_StoreRegister(Value, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
_StoreRegister(Value, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
|
||||||
} else {
|
} else {
|
||||||
if (ValueOffset || MustMask) {
|
if (ValueOffset || MustMask) {
|
||||||
Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
|
Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
|
||||||
@ -1459,9 +1459,9 @@ private:
|
|||||||
return _NZCVSelect(OpSize::i32Bit, CondForNZCVBit(BitOffset, Invert), _Constant(1), _Constant(0));
|
return _NZCVSelect(OpSize::i32Bit, CondForNZCVBit(BitOffset, Invert), _Constant(1), _Constant(0));
|
||||||
}
|
}
|
||||||
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
|
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
|
||||||
return _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
return _LoadRegister(offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, CTX->GetGPRSize());
|
||||||
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
|
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
|
||||||
return _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
|
return _LoadRegister(offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, CTX->GetGPRSize());
|
||||||
} else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
|
} else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
|
||||||
// Recover the sign bit, it is the logical DF value
|
// Recover the sign bit, it is the logical DF value
|
||||||
return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63));
|
return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63));
|
||||||
|
@ -227,8 +227,7 @@ struct ThunkHandler_impl final : public ThunkHandler {
|
|||||||
const uint8_t GPRSize = CTX->GetGPRSize();
|
const uint8_t GPRSize = CTX->GetGPRSize();
|
||||||
|
|
||||||
if (GPRSize == 8) {
|
if (GPRSize == 8) {
|
||||||
emit->_StoreRegister(emit->_Constant(Entrypoint), offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass,
|
emit->_StoreRegister(emit->_Constant(Entrypoint), offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass, GPRSize);
|
||||||
IR::GPRFixedClass, GPRSize);
|
|
||||||
} else {
|
} else {
|
||||||
emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0]));
|
emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0]));
|
||||||
}
|
}
|
||||||
|
@ -343,14 +343,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"StaticRA": {
|
"StaticRA": {
|
||||||
"SSA = LoadRegister u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": {
|
"SSA = LoadRegister u32:$Offset, RegisterClass:$Class, u8:#Size": {
|
||||||
"Desc": ["Loads a value from the static-ra context with offset",
|
"Desc": ["Loads a value from the static-ra context with offset",
|
||||||
"Dest = Ctx[Offset]"
|
"Dest = Ctx[Offset]"
|
||||||
],
|
],
|
||||||
"DestSize": "Size"
|
"DestSize": "Size"
|
||||||
},
|
},
|
||||||
|
|
||||||
"StoreRegister SSA:$Value, u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": {
|
"StoreRegister SSA:$Value, u32:$Offset, RegisterClass:$Class, u8:#Size": {
|
||||||
"HasSideEffects": true,
|
"HasSideEffects": true,
|
||||||
"Desc": ["Stores a value to the static-ra context with offset",
|
"Desc": ["Stores a value to the static-ra context with offset",
|
||||||
"Ctx[Offset] = Value",
|
"Ctx[Offset] = Value",
|
||||||
|
@ -283,7 +283,7 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
|
|||||||
|
|
||||||
case OP_LOADREGISTER: {
|
case OP_LOADREGISTER: {
|
||||||
auto Op = IROp->CW<IR::IROp_LoadRegister>();
|
auto Op = IROp->CW<IR::IROp_LoadRegister>();
|
||||||
if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) {
|
if (Op->Class != GPRClass) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -292,7 +292,7 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
|
|||||||
|
|
||||||
case OP_STOREREGISTER: {
|
case OP_STOREREGISTER: {
|
||||||
auto Op = IROp->CW<IR::IROp_StoreRegister>();
|
auto Op = IROp->CW<IR::IROp_StoreRegister>();
|
||||||
if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) {
|
if (Op->Class != GPRClass) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -465,23 +465,23 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
|
|||||||
// Helpers
|
// Helpers
|
||||||
|
|
||||||
// Is an OP_STOREREGISTER eligible to write directly to the SRA reg?
|
// Is an OP_STOREREGISTER eligible to write directly to the SRA reg?
|
||||||
auto IsPreWritable = [this](uint8_t Size, RegisterClassType StaticClass) {
|
auto IsPreWritable = [this](uint8_t Size, RegisterClassType Class) {
|
||||||
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass);
|
LOGMAN_THROW_A_FMT(Class == GPRClass || Class == FPRClass, "Unexpected class {}", Class);
|
||||||
if (StaticClass == GPRFixedClass) {
|
if (Class == GPRClass) {
|
||||||
return Size == 8 || Size == 4;
|
return Size == 8 || Size == 4;
|
||||||
} else if (StaticClass == FPRFixedClass) {
|
} else if (Class == FPRClass) {
|
||||||
return Size == 16 || (Size == 32 && SupportsAVX);
|
return Size == 16 || (Size == 32 && SupportsAVX);
|
||||||
}
|
}
|
||||||
return false; // Unknown
|
return false; // Unknown
|
||||||
};
|
};
|
||||||
|
|
||||||
// Is an OP_LOADREGISTER eligible to read directly from the SRA reg?
|
// Is an OP_LOADREGISTER eligible to read directly from the SRA reg?
|
||||||
auto IsAliasable = [this](uint8_t Size, RegisterClassType StaticClass, uint32_t Offset) {
|
auto IsAliasable = [this](uint8_t Size, RegisterClassType Class, uint32_t Offset) {
|
||||||
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass);
|
LOGMAN_THROW_A_FMT(Class == GPRClass || Class == FPRClass, "Unexpected class {}", Class);
|
||||||
if (StaticClass == GPRFixedClass) {
|
if (Class == GPRClass) {
|
||||||
// We need more meta info to support not-size-of-reg
|
// We need more meta info to support not-size-of-reg
|
||||||
return (Size == 8 || Size == 4) && ((Offset & 7) == 0);
|
return (Size == 8 || Size == 4) && ((Offset & 7) == 0);
|
||||||
} else if (StaticClass == FPRFixedClass) {
|
} else if (Class == FPRClass) {
|
||||||
// We need more meta info to support not-size-of-reg
|
// We need more meta info to support not-size-of-reg
|
||||||
if (Size == 32 && SupportsAVX && (Offset & 31) == 0) {
|
if (Size == 32 && SupportsAVX && (Offset & 31) == 0) {
|
||||||
return true;
|
return true;
|
||||||
@ -592,13 +592,13 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
|
|||||||
const auto OpID = Op->Value.ID();
|
const auto OpID = Op->Value.ID();
|
||||||
auto& OpLiveRange = LiveRanges[OpID.Value];
|
auto& OpLiveRange = LiveRanges[OpID.Value];
|
||||||
|
|
||||||
if (IsPreWritable(IROp->Size, Op->StaticClass) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) {
|
if (IsPreWritable(IROp->Size, Op->Class) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) {
|
||||||
|
|
||||||
// Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens
|
// Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens
|
||||||
SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node);
|
SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node);
|
||||||
OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset);
|
OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset);
|
||||||
OpLiveRange.PreWritten = Node;
|
OpLiveRange.PreWritten = Node;
|
||||||
SetNodeClass(Graph, OpID, Op->StaticClass);
|
SetNodeClass(Graph, OpID, Op->Class == FPRClass ? FPRFixedClass : GPRFixedClass);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -679,7 +679,7 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
|
|||||||
// if not sra-allocated and full size, sra-allocate
|
// if not sra-allocated and full size, sra-allocate
|
||||||
if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) {
|
if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) {
|
||||||
// only full size reads can be aliased
|
// only full size reads can be aliased
|
||||||
if (IsAliasable(IROp->Size, Op->StaticClass, Op->Offset)) {
|
if (IsAliasable(IROp->Size, Op->Class, Op->Offset)) {
|
||||||
// We can only track a single active span.
|
// We can only track a single active span.
|
||||||
// Marking here as written is overly agressive, but
|
// Marking here as written is overly agressive, but
|
||||||
// there might be write(s) later on the instruction stream
|
// there might be write(s) later on the instruction stream
|
||||||
@ -692,7 +692,7 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
|
|||||||
|
|
||||||
NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); // 0, 1, and so on
|
NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); // 0, 1, and so on
|
||||||
(*StaticMap) = &NodeLiveRange;
|
(*StaticMap) = &NodeLiveRange;
|
||||||
SetNodeClass(Graph, Node, Op->StaticClass);
|
SetNodeClass(Graph, Node, Op->Class == FPRClass ? FPRFixedClass : GPRFixedClass);
|
||||||
SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/);
|
SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user