Merge pull request #3616 from alyssarosenzweig/sra/simplify-1

SRA controlled burn
This commit is contained in:
Alyssa Rosenzweig 2024-05-08 14:10:48 -04:00 committed by GitHub
commit 47242dc190
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 166 additions and 681 deletions

View File

@ -86,170 +86,33 @@ DEF_OP(LoadRegister) {
const auto OpSize = IROp->Size;
if (Op->Class == IR::GPRClass) {
const auto regId = Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ?
(StaticRegisters.size() - 2) :
Op->Offset == offsetof(Core::CpuStateFrame, State.af_raw) ?
(StaticRegisters.size() - 1) :
(Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE;
unsigned Reg = Op->Reg == Core::CPUState::PF_AS_GREG ? (StaticRegisters.size() - 2) :
Op->Reg == Core::CPUState::AF_AS_GREG ? (StaticRegisters.size() - 1) :
Op->Reg;
const auto regOffs = Op->Offset & 7;
LOGMAN_THROW_A_FMT(Reg < StaticRegisters.size(), "out of range reg");
const auto reg = StaticRegisters[Reg];
LOGMAN_THROW_A_FMT(regId < StaticRegisters.size(), "out of range regId");
const auto reg = StaticRegisters[regId];
switch (OpSize) {
case 4:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs");
if (GetReg(Node).Idx() != reg.Idx()) {
if (GetReg(Node).Idx() != reg.Idx()) {
if (OpSize == 4) {
mov(GetReg(Node).W(), reg.W());
}
break;
case 8:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs");
if (GetReg(Node).Idx() != reg.Idx()) {
} else {
mov(GetReg(Node).X(), reg.X());
}
break;
default: LOGMAN_MSG_A_FMT("Unhandled LoadRegister GPR size: {}", OpSize); break;
}
} else if (Op->Class == IR::FPRClass) {
const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0])) / regSize;
LOGMAN_THROW_A_FMT(Op->Reg < StaticFPRegisters.size(), "out of range reg");
LOGMAN_THROW_A_FMT(OpSize == regSize, "expected sized");
LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "out of range regId");
const auto guest = StaticFPRegisters[regId];
const auto guest = StaticFPRegisters[Op->Reg];
const auto host = GetVReg(Node);
if (HostSupportsSVE256) {
const auto regOffs = Op->Offset & 31;
ARMEmitter::SingleUseForwardLabel DataLocation;
const auto LoadPredicate = [this, &DataLocation] {
const auto Predicate = ARMEmitter::PReg::p0;
adr(TMP1, &DataLocation);
ldr(Predicate, TMP1);
return Predicate.Merging();
};
const auto EmitData = [this, &DataLocation](uint32_t Value) {
ARMEmitter::SingleUseForwardLabel PastConstant;
b(&PastConstant);
Bind(&DataLocation);
dc32(Value);
Bind(&PastConstant);
};
switch (OpSize) {
case 1: {
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
dup(ARMEmitter::ScalarRegSize::i8Bit, host, guest, 0);
break;
}
case 2: {
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
fmov(host.H(), guest.H());
break;
}
case 4: {
LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs);
if (regOffs == 0) {
if (host.Idx() != guest.Idx()) {
fmov(host.S(), guest.S());
}
} else {
const auto Predicate = LoadPredicate();
dup(FEXCore::ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), host.Z(), 0);
mov(FEXCore::ARMEmitter::SubRegSize::i32Bit, guest.Z(), Predicate, VTMP1.Z());
EmitData(1U << regOffs);
}
break;
}
case 8: {
LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs);
if (regOffs == 0) {
if (host.Idx() != guest.Idx()) {
dup(ARMEmitter::ScalarRegSize::i64Bit, host, guest, 0);
}
} else {
const auto Predicate = LoadPredicate();
dup(FEXCore::ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), host.Z(), 0);
mov(FEXCore::ARMEmitter::SubRegSize::i64Bit, guest.Z(), Predicate, VTMP1.Z());
EmitData(1U << regOffs);
}
break;
}
case 16: {
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
if (host.Idx() != guest.Idx()) {
mov(host.Q(), guest.Q());
}
break;
}
case 32: {
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
if (host.Idx() != guest.Idx()) {
mov(ARMEmitter::SubRegSize::i64Bit, host.Z(), PRED_TMP_32B.Merging(), guest.Z());
}
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled LoadRegister FPR size: {}", OpSize); break;
}
} else {
const auto regOffs = Op->Offset & 15;
switch (OpSize) {
case 1:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
dup(ARMEmitter::ScalarRegSize::i8Bit, host, guest, 0);
break;
case 2:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
fmov(host.H(), guest.H());
break;
case 4:
LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs);
if (regOffs == 0) {
if (host.Idx() != guest.Idx()) {
fmov(host.S(), guest.S());
}
} else {
ins(ARMEmitter::SubRegSize::i32Bit, host, 0, guest, regOffs / 4);
}
break;
case 8:
LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs);
if (regOffs == 0) {
if (host.Idx() != guest.Idx()) {
dup(ARMEmitter::ScalarRegSize::i64Bit, host, guest, 0);
}
} else {
ins(ARMEmitter::SubRegSize::i64Bit, host, 0, guest, regOffs / 8);
}
break;
case 16:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
if (host.Idx() != guest.Idx()) {
mov(host.Q(), guest.Q());
}
break;
default: LOGMAN_MSG_A_FMT("Unhandled LoadRegister FPR size: {}", OpSize); break;
if (host.Idx() != guest.Idx()) {
if (HostSupportsSVE256) {
mov(ARMEmitter::SubRegSize::i64Bit, host.Z(), PRED_TMP_32B.Merging(), guest.Z());
} else {
mov(host.Q(), guest.Q());
}
}
} else {
@ -261,171 +124,33 @@ DEF_OP(StoreRegister) {
const auto Op = IROp->C<IR::IROp_StoreRegister>();
const auto OpSize = IROp->Size;
if (Op->Class == IR::GPRClass) {
const auto regOffs = Op->Offset & 7;
unsigned Reg = Op->Reg == Core::CPUState::PF_AS_GREG ? (StaticRegisters.size() - 2) :
Op->Reg == Core::CPUState::AF_AS_GREG ? (StaticRegisters.size() - 1) :
Op->Reg;
const auto regId = Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ?
(StaticRegisters.size() - 2) :
Op->Offset == offsetof(Core::CpuStateFrame, State.af_raw) ?
(StaticRegisters.size() - 1) :
(Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE;
LOGMAN_THROW_A_FMT(regId < StaticRegisters.size(), "out of range regId");
const auto reg = StaticRegisters[regId];
LOGMAN_THROW_A_FMT(Reg < StaticRegisters.size(), "out of range reg");
const auto reg = StaticRegisters[Reg];
const auto Src = GetReg(Op->Value.ID());
switch (OpSize) {
case 4:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs");
if (Src.Idx() != reg.Idx()) {
mov(ARMEmitter::Size::i32Bit, reg, Src);
}
break;
case 8:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs");
if (Src.Idx() != reg.Idx()) {
mov(ARMEmitter::Size::i64Bit, reg, Src);
}
break;
default: LOGMAN_MSG_A_FMT("Unhandled StoreRegister GPR size: {}", OpSize); break;
if (Src.Idx() != reg.Idx()) {
// Always use 64-bit, it's faster. Upper bits ignored for 32-bit mode.
mov(ARMEmitter::Size::i64Bit, reg, Src);
}
} else if (Op->Class == IR::FPRClass) {
const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0])) / regSize;
LOGMAN_THROW_A_FMT(Op->Reg < StaticFPRegisters.size(), "reg out of range");
LOGMAN_THROW_A_FMT(OpSize == regSize, "expected sized");
LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "regId out of range");
const auto guest = StaticFPRegisters[regId];
const auto guest = StaticFPRegisters[Op->Reg];
const auto host = GetVReg(Op->Value.ID());
if (HostSupportsSVE256) {
// 256-bit capable hardware allows us to expand the allowed
// offsets used, however we cannot use Adv. SIMD's INS instruction
// at all, since it will zero out the upper lanes of the 256-bit SVE
// vectors, so we'll need to set up a proper predicate for performing
// the insert.
const auto regOffs = Op->Offset & 31;
// Compartmentalized setting up of the predicate for the cases that need it.
ARMEmitter::SingleUseForwardLabel DataLocation;
const auto LoadPredicate = [this, &DataLocation] {
const auto Predicate = ARMEmitter::PReg::p0;
adr(TMP1, &DataLocation);
ldr(Predicate, TMP1);
return Predicate.Merging();
};
// Emits the predicate data and provides the necessary jump to go around the
// emitted data instead of trying to execute it. Place at end of necessary code.
// It's helpful to treat LoadPredicate and EmitData as a prologue and epilogue
// respectfully.
const auto EmitData = [this, &DataLocation](uint32_t Data) {
ARMEmitter::SingleUseForwardLabel PastConstant;
b(&PastConstant);
Bind(&DataLocation);
dc32(Data);
Bind(&PastConstant);
};
switch (OpSize) {
case 1: {
LOGMAN_THROW_AA_FMT(regOffs <= 31, "unexpected reg index: {}", regOffs);
const auto Predicate = LoadPredicate();
dup(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), host.Z(), 0);
mov(ARMEmitter::SubRegSize::i8Bit, guest.Z(), Predicate, VTMP1.Z());
EmitData(1U << regOffs);
break;
}
case 2: {
LOGMAN_THROW_AA_FMT((regOffs / 2) <= 15, "unexpected reg index: {}", regOffs / 2);
const auto Predicate = LoadPredicate();
dup(ARMEmitter::SubRegSize::i16Bit, VTMP1.Z(), host.Z(), 0);
mov(ARMEmitter::SubRegSize::i16Bit, guest.Z(), Predicate, VTMP1.Z());
EmitData(1U << regOffs);
break;
}
case 4: {
LOGMAN_THROW_AA_FMT((regOffs / 4) <= 7, "unexpected reg index: {}", regOffs / 4);
const auto Predicate = LoadPredicate();
dup(ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), host.Z(), 0);
mov(ARMEmitter::SubRegSize::i32Bit, guest.Z(), Predicate, VTMP1.Z());
EmitData(1U << regOffs);
break;
}
case 8: {
LOGMAN_THROW_AA_FMT((regOffs / 8) <= 3, "unexpected reg index: {}", regOffs / 8);
const auto Predicate = LoadPredicate();
dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), host.Z(), 0);
mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), Predicate, VTMP1.Z());
EmitData(1U << regOffs);
break;
}
case 16: {
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
if (guest.Idx() != host.Idx()) {
mov(guest.Q(), host.Q());
}
break;
}
case 32: {
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
if (guest.Idx() != host.Idx()) {
mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), PRED_TMP_32B.Merging(), host.Z());
}
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); break;
}
} else {
const auto regOffs = Op->Offset & 15;
switch (OpSize) {
case 1: ins(ARMEmitter::SubRegSize::i8Bit, guest, regOffs, host, 0); break;
case 2:
LOGMAN_THROW_AA_FMT((regOffs & 1) == 0, "unexpected regOffs: {}", regOffs);
ins(ARMEmitter::SubRegSize::i16Bit, guest, regOffs / 2, host, 0);
break;
case 4:
LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs);
// XXX: This had a bug with insert of size 16bit
ins(ARMEmitter::SubRegSize::i32Bit, guest, regOffs / 4, host, 0);
break;
case 8:
LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs);
// XXX: This had a bug with insert of size 16bit
ins(ARMEmitter::SubRegSize::i64Bit, guest, regOffs / 8, host, 0);
break;
case 16:
LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs);
if (guest.Idx() != host.Idx()) {
mov(guest.Q(), host.Q());
}
break;
default: LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); break;
if (guest.Idx() != host.Idx()) {
if (HostSupportsSVE256) {
mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), PRED_TMP_32B.Merging(), host.Z());
} else {
mov(guest.Q(), host.Q());
}
}
} else {

View File

@ -3396,8 +3396,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
auto Src2 = _LoadMem(GPRClass, Size, Dest_RSI, Size);
// We'll calculate PF/AF after the loop, so use them as temporaries here.
_StoreRegister(Src1, false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
_StoreRegister(Src2, false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
_StoreRegister(Src1, Core::CPUState::PF_AS_GREG, GPRClass, CTX->GetGPRSize());
_StoreRegister(Src2, Core::CPUState::AF_AS_GREG, GPRClass, CTX->GetGPRSize());
OrderedNode* TailCounter = LoadGPRRegister(X86State::REG_RCX);
@ -3436,8 +3436,8 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
// Make sure to start a new block after ending this one
{
// Grab the sources from the last iteration so we can set flags.
auto Src1 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
auto Src2 = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
auto Src1 = _LoadRegister(Core::CPUState::PF_AS_GREG, GPRClass, CTX->GetGPRSize());
auto Src2 = _LoadRegister(Core::CPUState::AF_AS_GREG, GPRClass, CTX->GetGPRSize());
GenerateFlags_SUB(Op, Src2, Src1);
CalculateDeferredFlags();
}
@ -4406,7 +4406,7 @@ OrderedNode* OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8
if (Size == -1) {
Size = GPRSize;
}
OrderedNode* Reg = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize);
OrderedNode* Reg = _LoadRegister(GPR, GPRClass, GPRSize);
if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) {
// Extract the subregister if requested.
@ -4422,11 +4422,7 @@ OrderedNode* OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, int8_t Size, uint8
OrderedNode* OpDispatchBuilder::LoadXMMRegister(uint32_t XMM) {
const auto VectorSize = CTX->HostFeatures.SupportsAVX ? 32 : 16;
const auto VectorOffset =
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
OrderedNode* Reg = _LoadRegister(false, VectorOffset, FPRClass, FPRFixedClass, VectorSize);
return Reg;
return _LoadRegister(XMM, FPRClass, VectorSize);
}
void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode* const Src, int8_t Size, uint8_t Offset) {
@ -4442,15 +4438,12 @@ void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, OrderedNode* const Src, i
Reg = _Bfi(IR::SizeToOpSize(GPRSize), Size * 8, Offset, Reg, Src);
}
_StoreRegister(Reg, false, offsetof(FEXCore::Core::CPUState, gregs[GPR]), GPRClass, GPRFixedClass, GPRSize);
_StoreRegister(Reg, GPR, GPRClass, GPRSize);
}
void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, OrderedNode* const Src) {
const auto VectorSize = CTX->HostFeatures.SupportsAVX ? 32 : 16;
const auto VectorOffset =
CTX->HostFeatures.SupportsAVX ? offsetof(Core::CPUState, xmm.avx.data[XMM][0]) : offsetof(Core::CPUState, xmm.sse.data[XMM][0]);
_StoreRegister(Src, false, VectorOffset, FPRClass, FPRFixedClass, VectorSize);
_StoreRegister(Src, XMM, FPRClass, VectorSize);
}
OrderedNode* OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op,

View File

@ -1402,9 +1402,9 @@ private:
if (IsNZCV(BitOffset)) {
InsertNZCV(BitOffset, Value, ValueOffset, MustMask);
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
_StoreRegister(Value, false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
_StoreRegister(Value, Core::CPUState::PF_AS_GREG, GPRClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
_StoreRegister(Value, false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
_StoreRegister(Value, Core::CPUState::AF_AS_GREG, GPRClass, CTX->GetGPRSize());
} else {
if (ValueOffset || MustMask) {
Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
@ -1459,9 +1459,9 @@ private:
return _NZCVSelect(OpSize::i32Bit, CondForNZCVBit(BitOffset, Invert), _Constant(1), _Constant(0));
}
} else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
return _LoadRegister(false, offsetof(FEXCore::Core::CPUState, pf_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
return _LoadRegister(Core::CPUState::PF_AS_GREG, GPRClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
return _LoadRegister(false, offsetof(FEXCore::Core::CPUState, af_raw), GPRClass, GPRFixedClass, CTX->GetGPRSize());
return _LoadRegister(Core::CPUState::AF_AS_GREG, GPRClass, CTX->GetGPRSize());
} else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
// Recover the sign bit, it is the logical DF value
return _Lshr(OpSize::i64Bit, _LoadDF(), _Constant(63));

View File

@ -227,8 +227,7 @@ struct ThunkHandler_impl final : public ThunkHandler {
const uint8_t GPRSize = CTX->GetGPRSize();
if (GPRSize == 8) {
emit->_StoreRegister(emit->_Constant(Entrypoint), false, offsetof(Core::CPUState, gregs[X86State::REG_R11]), IR::GPRClass,
IR::GPRFixedClass, GPRSize);
emit->_StoreRegister(emit->_Constant(Entrypoint), X86State::REG_R11, IR::GPRClass, GPRSize);
} else {
emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0]));
}

View File

@ -343,20 +343,16 @@
}
},
"StaticRA": {
"SSA = LoadRegister i1:$IsAlias, u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": {
"Desc": ["Loads a value from the static-ra context with offset",
"Dest = Ctx[Offset]"
],
"SSA = LoadRegister u32:$Reg, RegisterClass:$Class, u8:#Size": {
"Desc": ["Loads a value from the given register",
"Size must match the execution mode."],
"DestSize": "Size"
},
"StoreRegister SSA:$Value, i1:$IsPrewrite, u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": {
"StoreRegister SSA:$Value, u32:$Reg, RegisterClass:$Class, u8:#Size": {
"HasSideEffects": true,
"Desc": ["Stores a value to the static-ra context with offset",
"Ctx[Offset] = Value",
"Zero Extends if value's type is too small",
"Truncates if value's type is too large"
],
"Desc": ["Stores a value to a given register.",
"Size must match the execution mode."],
"DestSize": "Size",
"EmitValidation": [
"WalkFindRegClass($Value) == $Class"

View File

@ -78,7 +78,7 @@ void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx, bool Inli
InsertPass(CreateLongDivideEliminationPass());
}
InsertPass(CreateDeadStoreElimination(ctx->HostFeatures.SupportsAVX));
InsertPass(CreateDeadStoreElimination());
InsertPass(CreatePassDeadCodeElimination());
InsertPass(CreateConstProp(InlineConstants, ctx->HostFeatures.SupportsTSOImm9, Is64BitMode()));

View File

@ -20,7 +20,7 @@ fextl::unique_ptr<FEXCore::IR::Pass> CreateConstProp(bool InlineConstants, bool
fextl::unique_ptr<FEXCore::IR::Pass> CreateContextLoadStoreElimination(bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreateInlineCallOptimization(const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination(bool SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination();
fextl::unique_ptr<FEXCore::IR::Pass> CreatePassDeadCodeElimination();
fextl::unique_ptr<FEXCore::IR::Pass> CreateIRCompaction(FEXCore::Utils::IntrusivePooledAllocator& Allocator);
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, bool SupportsAVX);

View File

@ -495,6 +495,18 @@ private:
// Block local Passes
bool RedundantStoreLoadElimination(FEXCore::IR::IREmitter* IREmit);
unsigned OffsetForReg(FEXCore::IR::RegisterClassType Class, unsigned Reg, unsigned Size) {
if (Class == FEXCore::IR::FPRClass) {
return Size == 32 ? offsetof(FEXCore::Core::CPUState, xmm.avx.data[Reg][0]) : offsetof(FEXCore::Core::CPUState, xmm.sse.data[Reg][0]);
} else if (Reg == FEXCore::Core::CPUState::PF_AS_GREG) {
return offsetof(FEXCore::Core::CPUState, pf_raw);
} else if (Reg == FEXCore::Core::CPUState::AF_AS_GREG) {
return offsetof(FEXCore::Core::CPUState, af_raw);
} else {
return offsetof(FEXCore::Core::CPUState, gregs[Reg]);
}
}
};
ContextMemberInfo* RCLSE::FindMemberInfo(ContextInfo* ContextClassificationInfo, uint32_t Offset, uint8_t Size) {
@ -648,10 +660,13 @@ bool RCLSE::RedundantStoreLoadElimination(FEXCore::IR::IREmitter* IREmit) {
Changed |= ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value));
} else if (IROp->Op == OP_STOREREGISTER) {
auto Op = IROp->CW<IR::IROp_StoreRegister>();
Changed |= ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value));
auto Offset = OffsetForReg(Op->Class, Op->Reg, IROp->Size);
Changed |= ClassifyContextStore(IREmit, &LocalInfo, Op->Class, Offset, IROp->Size, CodeNode, CurrentIR.GetNode(Op->Value));
} else if (IROp->Op == OP_LOADREGISTER) {
auto Op = IROp->CW<IR::IROp_LoadRegister>();
Changed |= ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, BlockEnd);
auto Offset = OffsetForReg(Op->Class, Op->Reg, IROp->Size);
Changed |= ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Offset, IROp->Size, CodeNode, BlockEnd);
} else if (IROp->Op == OP_LOADCONTEXT) {
auto Op = IROp->CW<IR::IROp_LoadContext>();
Changed |= ClassifyContextLoad(IREmit, &LocalInfo, Op->Class, Op->Offset, IROp->Size, CodeNode, BlockEnd);

View File

@ -26,69 +26,12 @@ constexpr int PropagationRounds = 5;
class DeadStoreElimination final : public FEXCore::IR::Pass {
public:
explicit DeadStoreElimination(bool SupportsAVX_)
: SupportsAVX {SupportsAVX_} {}
explicit DeadStoreElimination() {}
bool Run(IREmitter* IREmit) override;
private:
bool SupportsAVX;
bool IsFPR(uint32_t Offset) const {
const auto [begin, end] = [this]() -> std::pair<ptrdiff_t, ptrdiff_t> {
if (SupportsAVX) {
return {offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[0][0]),
offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[16][0])};
} else {
return {offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[0][0]),
offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[16][0])};
}
}();
if (Offset < begin || Offset >= end) {
return false;
}
return true;
}
bool IsTrackedWriteFPR(uint32_t Offset, uint8_t Size) const {
if (Size != 16 && Size != 8 && Size != 4) {
return false;
}
if (Offset & 15) {
return false;
}
return IsFPR(Offset);
}
uint64_t FPRBit(uint32_t Offset, uint32_t Size) const {
if (!IsFPR(Offset)) {
return 0;
}
const auto begin = offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0]);
const auto regSize = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
const auto regn = (Offset - begin) / regSize;
const auto bitn = regn * 3;
if (!IsTrackedWriteFPR(Offset, Size)) {
return 7UL << (bitn);
}
if (Size == 16) {
return 7UL << (bitn);
} else if (Size == 8) {
return 3UL << (bitn);
} else if (Size == 4) {
return 1UL << (bitn);
} else {
LOGMAN_MSG_A_FMT("Unexpected FPR size {}", Size);
}
return 7UL << (bitn); // Return maximum on failure case
uint64_t FPRBit(RegisterClassType Class, uint32_t Reg) const {
return (Class == FPRClass) ? (1UL << Reg) : 0;
}
};
@ -105,42 +48,14 @@ struct GPRInfo {
uint32_t kill {0};
};
bool IsFullGPR(uint32_t Offset, uint8_t Size) {
if (Size != 8) {
return false;
}
if (Offset & 7) {
return false;
}
if (Offset < 8 || Offset >= (17 * 8)) {
return false;
}
return true;
}
bool IsGPR(uint32_t Offset) {
if (Offset < 8 || Offset >= (17 * 8)) {
return false;
}
return true;
}
uint32_t GPRBit(uint32_t Offset) {
if (!IsGPR(Offset)) {
return 0;
}
return 1 << ((Offset - 8) / 8);
uint32_t GPRBit(RegisterClassType Class, uint32_t Reg) {
return (Class == GPRClass) ? (1U << Reg) : 0;
}
struct FPRInfo {
uint64_t reads {0};
uint64_t writes {0};
uint64_t kill {0};
uint32_t reads {0};
uint32_t writes {0};
uint32_t kill {0};
};
struct Info {
@ -173,65 +88,33 @@ bool DeadStoreElimination::Run(IREmitter* IREmit) {
// This is conservative and doesn't try to be smart about loads after writes
{
for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) {
auto& BlockInfo = InfoMap[BlockNode];
for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
auto ClassifyRegisterStore = [this](Info& BlockInfo, uint32_t Offset, uint8_t Size) {
//// GPR ////
if (IsFullGPR(Offset, Size)) {
BlockInfo.gpr.writes |= GPRBit(Offset);
} else {
BlockInfo.gpr.reads |= GPRBit(Offset);
}
//// FPR ////
if (IsTrackedWriteFPR(Offset, Size)) {
BlockInfo.fpr.writes |= FPRBit(Offset, Size);
} else {
BlockInfo.fpr.reads |= FPRBit(Offset, Size);
}
};
auto ClassifyRegisterLoad = [this](Info& BlockInfo, uint32_t Offset, uint8_t Size) {
//// GPR ////
BlockInfo.gpr.reads |= GPRBit(Offset);
//// FPR ////
BlockInfo.fpr.reads |= FPRBit(Offset, Size);
};
//// Flags ////
if (IROp->Op == OP_STOREFLAG) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
auto& BlockInfo = InfoMap[BlockNode];
BlockInfo.flag.writes |= 1UL << Op->Flag;
} else if (IROp->Op == OP_INVALIDATEFLAGS) {
auto Op = IROp->C<IR::IROp_InvalidateFlags>();
auto& BlockInfo = InfoMap[BlockNode];
BlockInfo.flag.writes |= Op->Flags;
} else if (IROp->Op == OP_LOADFLAG) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
auto& BlockInfo = InfoMap[BlockNode];
BlockInfo.flag.reads |= 1UL << Op->Flag;
} else if (IROp->Op == OP_LOADDF) {
auto& BlockInfo = InfoMap[BlockNode];
BlockInfo.flag.reads |= 1UL << X86State::RFLAG_DF_RAW_LOC;
} else if (IROp->Op == OP_STOREREGISTER) {
auto Op = IROp->C<IR::IROp_StoreRegister>();
auto& BlockInfo = InfoMap[BlockNode];
ClassifyRegisterStore(BlockInfo, Op->Offset, IROp->Size);
BlockInfo.gpr.writes |= GPRBit(Op->Class, Op->Reg);
BlockInfo.fpr.writes |= FPRBit(Op->Class, Op->Reg);
} else if (IROp->Op == OP_LOADREGISTER) {
auto Op = IROp->C<IR::IROp_LoadRegister>();
auto& BlockInfo = InfoMap[BlockNode];
ClassifyRegisterLoad(BlockInfo, Op->Offset, IROp->Size);
BlockInfo.gpr.reads |= GPRBit(Op->Class, Op->Reg);
BlockInfo.fpr.reads |= FPRBit(Op->Class, Op->Reg);
}
}
}
@ -253,32 +136,15 @@ bool DeadStoreElimination::Run(IREmitter* IREmit) {
auto& BlockInfo = InfoMap[BlockNode];
auto& TargetInfo = InfoMap[TargetNode];
//// Flags ////
// stores to remove are written by the next block but not read
BlockInfo.flag.kill = TargetInfo.flag.writes & ~(TargetInfo.flag.reads) & ~BlockInfo.flag.reads;
BlockInfo.gpr.kill = TargetInfo.gpr.writes & ~(TargetInfo.gpr.reads) & ~BlockInfo.gpr.reads;
BlockInfo.fpr.kill = TargetInfo.fpr.writes & ~(TargetInfo.fpr.reads) & ~BlockInfo.fpr.reads;
// Flags that are written by the next block can be considered as written by this block, if not read
BlockInfo.flag.writes |= BlockInfo.flag.kill & ~BlockInfo.flag.reads;
//// GPRs ////
// stores to remove are written by the next block but not read
BlockInfo.gpr.kill = TargetInfo.gpr.writes & ~(TargetInfo.gpr.reads) & ~BlockInfo.gpr.reads;
// GPRs that are written by the next block can be considered as written by this block, if not read
BlockInfo.gpr.writes |= BlockInfo.gpr.kill & ~BlockInfo.gpr.reads;
//// FPRs ////
// stores to remove are written by the next block but not read
BlockInfo.fpr.kill = TargetInfo.fpr.writes & ~(TargetInfo.fpr.reads) & ~BlockInfo.fpr.reads;
// FPRs that are written by the next block can be considered as written by this block, if not read
BlockInfo.fpr.writes |= BlockInfo.fpr.kill & ~BlockInfo.fpr.reads;
} else if (IROp->Op == OP_CONDJUMP) {
auto Op = IROp->C<IR::IROp_CondJump>();
@ -289,33 +155,18 @@ bool DeadStoreElimination::Run(IREmitter* IREmit) {
auto& TrueTargetInfo = InfoMap[TrueTargetNode];
auto& FalseTargetInfo = InfoMap[FalseTargetNode];
//// Flags ////
// stores to remove are written by the next blocks but not read
BlockInfo.flag.kill = TrueTargetInfo.flag.writes & ~(TrueTargetInfo.flag.reads) & ~BlockInfo.flag.reads;
BlockInfo.gpr.kill = TrueTargetInfo.gpr.writes & ~(TrueTargetInfo.gpr.reads) & ~BlockInfo.gpr.reads;
BlockInfo.fpr.kill = TrueTargetInfo.fpr.writes & ~(TrueTargetInfo.fpr.reads) & ~BlockInfo.fpr.reads;
BlockInfo.flag.kill &= FalseTargetInfo.flag.writes & ~(FalseTargetInfo.flag.reads) & ~BlockInfo.flag.reads;
BlockInfo.gpr.kill &= FalseTargetInfo.gpr.writes & ~(FalseTargetInfo.gpr.reads) & ~BlockInfo.gpr.reads;
BlockInfo.fpr.kill &= FalseTargetInfo.fpr.writes & ~(FalseTargetInfo.fpr.reads) & ~BlockInfo.fpr.reads;
// Flags that are written by the next blocks can be considered as written by this block, if not read
BlockInfo.flag.writes |= BlockInfo.flag.kill & ~BlockInfo.flag.reads;
//// GPRs ////
// stores to remove are written by the next blocks but not read
BlockInfo.gpr.kill = TrueTargetInfo.gpr.writes & ~(TrueTargetInfo.gpr.reads) & ~BlockInfo.gpr.reads;
BlockInfo.gpr.kill &= FalseTargetInfo.gpr.writes & ~(FalseTargetInfo.gpr.reads) & ~BlockInfo.gpr.reads;
// GPRs that are written by the next blocks can be considered as written by this block, if not read
BlockInfo.gpr.writes |= BlockInfo.gpr.kill & ~BlockInfo.gpr.reads;
//// FPRs ////
// stores to remove are written by the next blocks but not read
BlockInfo.fpr.kill = TrueTargetInfo.fpr.writes & ~(TrueTargetInfo.fpr.reads) & ~BlockInfo.fpr.reads;
BlockInfo.fpr.kill &= FalseTargetInfo.fpr.writes & ~(FalseTargetInfo.fpr.reads) & ~BlockInfo.fpr.reads;
// FPRs that are written by the next blocks can be considered as written by this block, if not read
BlockInfo.fpr.writes |= BlockInfo.fpr.kill & ~BlockInfo.fpr.reads;
}
}
@ -325,34 +176,12 @@ bool DeadStoreElimination::Run(IREmitter* IREmit) {
// Remove the dead stores
{
for (auto [BlockNode, BlockIROp] : CurrentIR.GetBlocks()) {
auto& BlockInfo = InfoMap[BlockNode];
for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
auto RemoveDeadRegisterStore = [this](FEXCore::IR::IREmitter* IREmit, FEXCore::IR::OrderedNode* CodeNode, Info& BlockInfo,
uint32_t Offset, uint8_t Size) -> bool {
bool Changed {};
//// GPRs ////
// If this OP_STOREREGISTER is never read, remove it
if (BlockInfo.gpr.kill & GPRBit(Offset)) {
IREmit->Remove(CodeNode);
Changed = true;
}
//// FPRs ////
// If this OP_STOREREGISTER is never read, remove it
if ((BlockInfo.fpr.kill & FPRBit(Offset, Size)) == FPRBit(Offset, Size) && (FPRBit(Offset, Size) != 0)) {
IREmit->Remove(CodeNode);
Changed = true;
}
return Changed;
};
//// Flags ////
if (IROp->Op == OP_STOREFLAG) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
auto& BlockInfo = InfoMap[BlockNode];
// If this StoreFlag is never read, remove it
if (BlockInfo.flag.kill & (1UL << Op->Flag)) {
IREmit->Remove(CodeNode);
@ -361,9 +190,11 @@ bool DeadStoreElimination::Run(IREmitter* IREmit) {
} else if (IROp->Op == OP_STOREREGISTER) {
auto Op = IROp->C<IR::IROp_StoreRegister>();
auto& BlockInfo = InfoMap[BlockNode];
Changed |= RemoveDeadRegisterStore(IREmit, CodeNode, BlockInfo, Op->Offset, IROp->Size);
// If this OP_STOREREGISTER is never read, remove it
if ((BlockInfo.gpr.kill & GPRBit(Op->Class, Op->Reg)) || (BlockInfo.fpr.kill & FPRBit(Op->Class, Op->Reg))) {
IREmit->Remove(CodeNode);
Changed = true;
}
}
}
}
@ -372,8 +203,8 @@ bool DeadStoreElimination::Run(IREmitter* IREmit) {
return Changed;
}
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination(bool SupportsAVX) {
return fextl::make_unique<DeadStoreElimination>(SupportsAVX);
fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadStoreElimination() {
return fextl::make_unique<DeadStoreElimination>();
}
} // namespace FEXCore::IR

View File

@ -57,12 +57,12 @@ public:
private:
FlagInfo Classify(IROp_Header* Node);
unsigned FlagForOffset(unsigned Offset);
unsigned FlagForReg(unsigned Reg);
unsigned FlagsForCondClassType(CondClassType Cond);
};
unsigned DeadFlagCalculationEliminination::FlagForOffset(unsigned Offset) {
return Offset == offsetof(FEXCore::Core::CPUState, pf_raw) ? FLAG_P : Offset == offsetof(FEXCore::Core::CPUState, af_raw) ? FLAG_A : 0;
unsigned DeadFlagCalculationEliminination::FlagForReg(unsigned Reg) {
return Reg == Core::CPUState::PF_AS_GREG ? FLAG_P : Reg == Core::CPUState::AF_AS_GREG ? FLAG_A : 0;
};
unsigned DeadFlagCalculationEliminination::FlagsForCondClassType(CondClassType Cond) {
@ -283,21 +283,20 @@ FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
case OP_LOADREGISTER: {
auto Op = IROp->CW<IR::IROp_LoadRegister>();
if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) {
if (Op->Class != GPRClass) {
break;
}
return {.Read = FlagForOffset(Op->Offset)};
return {.Read = FlagForReg(Op->Reg)};
}
case OP_STOREREGISTER: {
auto Op = IROp->CW<IR::IROp_StoreRegister>();
if (Op->Class != GPRClass || Op->StaticClass != GPRFixedClass) {
if (Op->Class != GPRClass) {
break;
}
LOGMAN_THROW_A_FMT(!Op->IsPrewrite, "PF/AF writes are fixed-form");
unsigned Flag = FlagForOffset(Op->Offset);
unsigned Flag = FlagForReg(Op->Reg);
return {
.Write = Flag,

View File

@ -465,110 +465,20 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
// Helpers
// Is an OP_STOREREGISTER eligible to write directly to the SRA reg?
auto IsPreWritable = [this](uint8_t Size, RegisterClassType StaticClass) {
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass);
if (StaticClass == GPRFixedClass) {
auto IsPreWritable = [this](uint8_t Size, RegisterClassType Class) {
LOGMAN_THROW_A_FMT(Class == GPRClass || Class == FPRClass, "Unexpected class {}", Class);
if (Class == GPRClass) {
return Size == 8 || Size == 4;
} else if (StaticClass == FPRFixedClass) {
} else if (Class == FPRClass) {
return Size == 16 || (Size == 32 && SupportsAVX);
}
return false; // Unknown
};
// Is an OP_LOADREGISTER eligible to read directly from the SRA reg?
auto IsAliasable = [this](uint8_t Size, RegisterClassType StaticClass, uint32_t Offset) {
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass);
if (StaticClass == GPRFixedClass) {
// We need more meta info to support not-size-of-reg
return (Size == 8 || Size == 4) && ((Offset & 7) == 0);
} else if (StaticClass == FPRFixedClass) {
// We need more meta info to support not-size-of-reg
if (Size == 32 && SupportsAVX && (Offset & 31) == 0) {
return true;
}
return (Size == 16 /*|| Size == 8 || Size == 4*/) && ((Offset & 15) == 0);
}
return false; // Unknown
};
const auto GetFPRBeginAndEnd = [this]() -> std::pair<ptrdiff_t, ptrdiff_t> {
if (SupportsAVX) {
return {
offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[0][0]),
offsetof(FEXCore::Core::CpuStateFrame, State.xmm.avx.data[16][0]),
};
} else {
return {
offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[0][0]),
offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data[16][0]),
};
}
};
// Get SRA Reg and Class from a Context offset
const auto GetRegAndClassFromOffset = [&, this](uint32_t Offset) {
const auto beginGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[0]);
const auto endGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[16]);
const auto pf = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw);
const auto af = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw);
const auto [beginFpr, endFpr] = GetFPRBeginAndEnd();
LOGMAN_THROW_AA_FMT((Offset >= beginGpr && Offset < endGpr) || (Offset >= beginFpr && Offset < endFpr) || (Offset == pf) || (Offset == af),
"Unexpected Offset {}", Offset);
unsigned FlagOffset = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2;
if (Offset == pf) {
return PhysicalRegister(GPRFixedClass, FlagOffset);
} else if (Offset == af) {
return PhysicalRegister(GPRFixedClass, FlagOffset + 1);
} else if (Offset >= beginGpr && Offset < endGpr) {
auto reg = (Offset - beginGpr) / Core::CPUState::GPR_REG_SIZE;
return PhysicalRegister(GPRFixedClass, reg);
} else if (Offset >= beginFpr && Offset < endFpr) {
const auto size = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
const auto reg = (Offset - beginFpr) / size;
return PhysicalRegister(FPRFixedClass, reg);
}
return PhysicalRegister::Invalid();
};
auto GprSize = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount;
auto MapsSize = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount + Graph->Set.Classes[FPRFixedClass.Val].PhysicalCount;
StaticMaps.resize(MapsSize);
// Get a StaticMap entry from context offset
const auto GetStaticMapFromOffset = [&](uint32_t Offset) -> LiveRange** {
const auto beginGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[0]);
const auto endGpr = offsetof(FEXCore::Core::CpuStateFrame, State.gregs[16]);
const auto pf = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw);
const auto af = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw);
const auto [beginFpr, endFpr] = GetFPRBeginAndEnd();
LOGMAN_THROW_AA_FMT((Offset >= beginGpr && Offset < endGpr) || (Offset >= beginFpr && Offset < endFpr) || (Offset == pf) || (Offset == af),
"Unexpected Offset {}", Offset);
unsigned FlagOffset = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2;
if (Offset == pf) {
return &StaticMaps[FlagOffset];
} else if (Offset == af) {
return &StaticMaps[FlagOffset + 1];
} else if (Offset >= beginGpr && Offset < endGpr) {
auto reg = (Offset - beginGpr) / Core::CPUState::GPR_REG_SIZE;
return &StaticMaps[reg];
} else if (Offset >= beginFpr && Offset < endFpr) {
const auto size = SupportsAVX ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE;
const auto reg = (Offset - beginFpr) / size;
return &StaticMaps[GprSize + reg];
}
return nullptr;
};
// Get a StaticMap entry from reg and class
const auto GetStaticMapFromReg = [&](IR::PhysicalRegister PhyReg) -> LiveRange** {
LOGMAN_THROW_A_FMT(PhyReg.Class == GPRFixedClass.Val || PhyReg.Class == FPRFixedClass.Val, "Unexpected Class {}", PhyReg.Class);
@ -582,6 +492,20 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
return nullptr;
};
const auto GetRegForSRA = [&](auto Class, auto Reg) {
unsigned FlagOffset = Graph->Set.Classes[GPRFixedClass.Val].PhysicalCount - 2;
if (Class == FPRClass) {
return PhysicalRegister(FPRFixedClass, Reg);
} else if (Reg == Core::CPUState::PF_AS_GREG) {
return PhysicalRegister(GPRFixedClass, FlagOffset);
} else if (Reg == Core::CPUState::AF_AS_GREG) {
return PhysicalRegister(GPRFixedClass, FlagOffset + 1);
} else {
return PhysicalRegister(GPRFixedClass, Reg);
}
};
// First pass: Mark pre-writes
for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) {
for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) {
@ -592,13 +516,12 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
const auto OpID = Op->Value.ID();
auto& OpLiveRange = LiveRanges[OpID.Value];
if (IsPreWritable(IROp->Size, Op->StaticClass) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) {
if (IsPreWritable(IROp->Size, Op->Class) && OpLiveRange.PrefferedRegister.IsInvalid() && !OpLiveRange.Global) {
// Pre-write and sra-allocate in the defining node - this might be undone if a read before the actual store happens
SRA_DEBUG("Prewritting ssa{} (Store in ssa{})\n", OpID, Node);
OpLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset);
OpLiveRange.PrefferedRegister = GetRegForSRA(Op->Class, Op->Reg);
OpLiveRange.PreWritten = Node;
SetNodeClass(Graph, OpID, Op->StaticClass);
SetNodeClass(Graph, OpID, RegisterClassType {OpLiveRange.PrefferedRegister.Class});
}
}
}
@ -664,7 +587,8 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
if (IROp->Op == OP_LOADREGISTER) {
auto Op = IROp->C<IR::IROp_LoadRegister>();
auto StaticMap = GetStaticMapFromOffset(Op->Offset);
auto Reg = GetRegForSRA(Op->Class, Op->Reg);
auto StaticMap = GetStaticMapFromReg(Reg);
// Make sure there wasn't a store pre-written before this read
if ((*StaticMap) && (*StaticMap)->PreWritten.IsValid()) {
@ -676,25 +600,22 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
SetNodeClass(Graph, ID, Op->Class);
}
// if not sra-allocated and full size, sra-allocate
// if not sra-allocated, sra-allocate
if (!NodeLiveRange.Global && NodeLiveRange.PrefferedRegister.IsInvalid()) {
// only full size reads can be aliased
if (IsAliasable(IROp->Size, Op->StaticClass, Op->Offset)) {
// We can only track a single active span.
// Marking here as written is overly agressive, but
// there might be write(s) later on the instruction stream
if ((*StaticMap)) {
SRA_DEBUG("Marking ssa{} as written because ssa{} re-loads sra{}, "
"and we can't track possible future writes\n",
(*StaticMap) - &LiveRanges[0], Node, -1 /*vreg*/);
(*StaticMap)->Written = true;
}
NodeLiveRange.PrefferedRegister = GetRegAndClassFromOffset(Op->Offset); // 0, 1, and so on
(*StaticMap) = &NodeLiveRange;
SetNodeClass(Graph, Node, Op->StaticClass);
SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/);
// We can only track a single active span.
// Marking here as written is overly agressive, but
// there might be write(s) later on the instruction stream
if ((*StaticMap)) {
SRA_DEBUG("Marking ssa{} as written because ssa{} re-loads sra{}, "
"and we can't track possible future writes\n",
(*StaticMap) - &LiveRanges[0], Node, -1 /*vreg*/);
(*StaticMap)->Written = true;
}
NodeLiveRange.PrefferedRegister = Reg;
(*StaticMap) = &NodeLiveRange;
SetNodeClass(Graph, Node, RegisterClassType {NodeLiveRange.PrefferedRegister.Class});
SRA_DEBUG("Marking ssa{} as allocated to sra{}\n", Node, -1 /*vreg*/);
}
}
}
@ -707,7 +628,8 @@ void ConstrainedRAPass::OptimizeStaticRegisters(FEXCore::IR::IRListView* IR) {
const auto OpID = Op->Value.ID();
auto& OpLiveRange = LiveRanges[OpID.Value];
auto StaticMap = GetStaticMapFromOffset(Op->Offset);
auto Reg = GetRegForSRA(Op->Class, Op->Reg);
auto StaticMap = GetStaticMapFromReg(Reg);
// if a read pending, it has been writting
if ((*StaticMap)) {
// writes to self don't invalidate the span

View File

@ -120,6 +120,11 @@ struct CPUState {
// Since this memory region is thread local, we use NonAtomicRefCounter for fast atomic access.
NonAtomicRefCounter<uint64_t>* DeferredSignalFaultAddress;
// PF/AF are statically mapped as-if they were r16/r17 (which do not exist in
// x86 otherwise). This allows a straightforward mapping for SRA.
static constexpr uint8_t PF_AS_GREG = 16;
static constexpr uint8_t AF_AS_GREG = 17;
static constexpr size_t FLAG_SIZE = sizeof(flags[0]);
static constexpr size_t GDT_SIZE = sizeof(gdt[0]);
static constexpr size_t GPR_REG_SIZE = sizeof(gregs[0]);

View File

@ -74,7 +74,7 @@
"add w10, w10, #0x40 (64)",
"add w11, w11, #0x40 (64)",
"sub w5, w5, #0x40 (64)",
"mov w27, w5",
"mov x27, x5",
"subs w26, w5, #0x40 (64)",
"cfinv"
]
@ -105,21 +105,21 @@
"ldr w4, [x4, #8]",
"mov w20, #0xffffffcc",
"str w10, [x9, w20, sxtw]",
"mov w5, w4",
"mov w7, w6",
"mov x5, x4",
"mov x7, x6",
"mov w20, #0xffffffdc",
"ldr w10, [x9, w20, sxtw]",
"mov w20, #0xffffffff",
"adds w21, w4, w20",
"mov w5, w21",
"mov x5, x21",
"mvn w27, w6",
"adcs w26, w6, w20",
"mov w7, w26",
"mov x7, x26",
"mov w20, #0xffffffd8",
"str w21, [x9, w20, sxtw]",
"mov w20, #0xffffffd4",
"str w26, [x9, w20, sxtw]",
"mov w7, w21",
"mov x7, x21",
"mov w22, #0xffffffd0",
"str w21, [x9, w22, sxtw]",
"ldr w5, [x9, w20, sxtw]",
@ -258,12 +258,12 @@
"ExpectedArm64ASM": [
"mov w20, w8",
"str w9, [x20, #-4]!",
"mov w8, w20",
"mov w9, w20",
"mov w27, w20",
"mov x8, x20",
"mov x9, x20",
"mov x27, x20",
"subs w26, w20, #0x44 (68)",
"cfinv",
"mov w8, w26",
"mov x8, x26",
"mov w21, #0xffffffbc",
"str w5, [x20, w21, sxtw]",
"ldr w4, [x20, w21, sxtw]",

View File

@ -199,7 +199,7 @@
"cset w20, hs",
"adds w26, w27, #0x1 (1)",
"rmif x20, #63, #nzCv",
"mov w4, w26"
"mov x4, x26"
]
},
"dec ax": {
@ -236,7 +236,7 @@
"cset w20, hs",
"subs w26, w27, #0x1 (1)",
"rmif x20, #63, #nzCv",
"mov w4, w26"
"mov x4, x26"
]
},
"pusha": {

View File

@ -209,7 +209,7 @@
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"mov w4, w26",
"mov x4, x26",
"msr nzcv, x20"
]
},
@ -254,7 +254,7 @@
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"mov w4, w26",
"mov x4, x26",
"msr nzcv, x20"
]
},