Merge pull request #3013 from Sonicadvance1/32bit_sra

IR/Passes/RA: Enable SRA for 32-bit GPRs
This commit is contained in:
Ryan Houdek 2023-08-27 21:30:39 -07:00 committed by GitHub
commit 6f2b3e76ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 49 additions and 17 deletions

View File

@ -1128,19 +1128,24 @@ DEF_OP(Bfe) {
auto Op = IROp->C<IR::IROp_Bfe>();
LOGMAN_THROW_AA_FMT(IROp->Size <= 8, "OpSize is too large for BFE: {}", IROp->Size);
LOGMAN_THROW_AA_FMT(Op->Width != 0, "Invalid BFE width of 0");
const uint8_t OpSize = IROp->Size;
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto Dst = GetReg(Node);
const auto Src = GetReg(Op->Src.ID());
ubfx(ARMEmitter::Size::i64Bit, Dst, Src, Op->lsb, Op->Width);
ubfx(EmitSize, Dst, Src, Op->lsb, Op->Width);
}
DEF_OP(Sbfe) {
auto Op = IROp->C<IR::IROp_Bfe>();
auto Op = IROp->C<IR::IROp_Sbfe>();
const uint8_t OpSize = IROp->Size;
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto Dst = GetReg(Node);
const auto Src = GetReg(Op->Src.ID());
sbfx(ARMEmitter::Size::i64Bit, Dst, Src, Op->lsb, Op->Width);
sbfx(EmitSize, Dst, Src, Op->lsb, Op->Width);
}
ARMEmitter::Condition MapSelectCC(IR::CondClassType Cond) {

View File

@ -133,7 +133,7 @@ DEF_OP(LoadRegister) {
[[maybe_unused]] const auto regId = (Op->Offset / Core::CPUState::GPR_REG_SIZE) - 1;
const auto regOffs = Op->Offset & 7;
LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "out of range regId");
LOGMAN_THROW_A_FMT(regId < StaticRegisters.size(), "out of range regId");
switch (OpSize) {
case 1:
@ -297,7 +297,7 @@ DEF_OP(LoadRegisterSRA) {
const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.gregs[0])) / Core::CPUState::GPR_REG_SIZE;
const auto regOffs = Op->Offset & 7;
LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "out of range regId");
LOGMAN_THROW_A_FMT(regId < StaticRegisters.size(), "out of range regId");
const auto reg = StaticRegisters[regId];

View File

@ -447,7 +447,7 @@ void OpDispatchBuilder::ADCOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, -1);
uint8_t Size = GetDstSize(Op);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC, Size);
auto ALUOp = _Add(Src, CF);
OrderedNode *Result{};
@ -478,7 +478,7 @@ void OpDispatchBuilder::SBBOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, -1);
auto Size = GetDstSize(Op);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC, Size);
auto ALUOp = _Add(Src, CF);
OrderedNode *Result{};
@ -2474,6 +2474,8 @@ void OpDispatchBuilder::PEXT(OpcodeArgs) {
}
void OpDispatchBuilder::ADXOp(OpcodeArgs) {
const auto OperandSize = GetSrcSize(Op);
// Calculate flags early.
CalculateDeferredFlags();
@ -2483,9 +2485,9 @@ void OpDispatchBuilder::ADXOp(OpcodeArgs) {
auto* Flag = [&]() -> OrderedNode* {
if (IsADCX) {
return GetRFLAG(X86State::RFLAG_CF_LOC);
return GetRFLAG(X86State::RFLAG_CF_LOC, OperandSize);
} else {
return GetRFLAG(X86State::RFLAG_OF_LOC);
return GetRFLAG(X86State::RFLAG_OF_LOC, OperandSize);
}
}();
@ -2516,7 +2518,7 @@ void OpDispatchBuilder::RCROp1Bit(OpcodeArgs) {
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1);
const auto Size = GetSrcBitSize(Op);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC, GetSrcSize(Op));
uint32_t Shift = 1;
@ -3281,7 +3283,6 @@ void OpDispatchBuilder::IMULOp(OpcodeArgs) {
void OpDispatchBuilder::MULOp(OpcodeArgs) {
const uint8_t Size = GetSrcSize(Op);
const uint8_t GPRSize = CTX->GetGPRSize();
OrderedNode *Src1 = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode* Src2 = LoadGPRRegister(X86State::REG_RAX);
@ -3308,8 +3309,8 @@ void OpDispatchBuilder::MULOp(OpcodeArgs) {
else if (Size == 4) {
// 32bits stored in EAX
// 32bits stored in EDX
OrderedNode *ResultLow = _Bfe(GPRSize, 32, 0, Result);
ResultHigh = _Bfe(GPRSize, 32, 32, Result);
OrderedNode *ResultLow = _Bfe(8, 32, 0, Result);
ResultHigh = _Bfe(8, 32, 32, Result);
StoreGPRRegister(X86State::REG_RAX, ResultLow);
StoreGPRRegister(X86State::REG_RDX, ResultHigh);
}

View File

@ -1192,10 +1192,10 @@ private:
void ZeroMultipleFlags(uint32_t BitMask);
OrderedNode *GetRFLAG(unsigned BitOffset) {
OrderedNode *GetRFLAG(unsigned BitOffset, int32_t Size = 8) {
if (IsNZCV(BitOffset)) {
if (!CachedNZCV || (PossiblySetNZCVBits & (1u << IndexNZCV(BitOffset))))
return _Bfe(1, 1, IndexNZCV(BitOffset), GetNZCV());
return _Bfe(Size, 1, IndexNZCV(BitOffset), GetNZCV());
else
return _Constant(0);
} else {

View File

@ -548,7 +548,7 @@ namespace {
auto IsPreWritable = [](uint8_t Size, RegisterClassType StaticClass) {
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass);
if (StaticClass == GPRFixedClass) {
return Size == 8;
return Size == 8 || Size == 4;
} else if (StaticClass == FPRFixedClass) {
return Size == 16;
}
@ -560,7 +560,7 @@ namespace {
LOGMAN_THROW_A_FMT(StaticClass == GPRFixedClass || StaticClass == FPRFixedClass, "Unexpected static class {}", StaticClass);
if (StaticClass == GPRFixedClass) {
// We need more meta info to support not-size-of-reg
return (Size == 8 /*|| Size == 4*/) && ((Offset & 7) == 0);
return (Size == 8 || Size == 4) && ((Offset & 7) == 0);
} else if (StaticClass == FPRFixedClass) {
// We need more meta info to support not-size-of-reg
return (Size == 16 /*|| Size == 8 || Size == 4*/) && ((Offset & 15) == 0);

View File

@ -0,0 +1,26 @@
%ifdef CONFIG
{
"RegData": {
"RAX": "0x00000000fffffffe",
"RBX": "0x0000000000000001"
},
"Mode": "32BIT"
}
%endif
; FEX had a bug where ADD or SUB with carry was generating results with garbage in the upper 32-bits.
mov eax, -1
mov ebx, -1
mov edx, -1
clc
adc eax, edx
adc eax, edx
adc eax, edx
clc
sbb ebx, edx
sbb ebx, edx
sbb ebx, edx
hlt