Merge pull request #2832 from alyssarosenzweig/flags/pack-nzcv

Pack NZCV flags
This commit is contained in:
Ryan Houdek 2023-08-02 13:42:56 -07:00 committed by GitHub
commit 91bd3aa62a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 452 additions and 266 deletions

View File

@ -381,6 +381,8 @@ namespace FEXCore::Context {
bool ExitOnHLTEnabled() const { return ExitOnHLT; }
FEXCore::CPU::CPUBackendFeatures BackendFeatures;
protected:
void ClearCodeCache(FEXCore::Core::InternalThreadState *Thread);
@ -436,7 +438,6 @@ namespace FEXCore::Context {
std::shared_mutex CustomIRMutex;
fextl::unordered_map<uint64_t, std::tuple<CustomIREntrypointHandler, void *, void *>> CustomIRHandlers;
FEXCore::CPU::CPUBackendFeatures BackendFeatures;
FEXCore::CPU::DispatcherConfig DispatcherConfig;
};

View File

@ -190,19 +190,31 @@ DEF_OP(LoadFlag) {
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
ContextPtr += Op->Flag;
uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
GD = *MemData;
if (Op->Flag == 24 /* NZCV */) {
uint32_t const *MemData = reinterpret_cast<uint32_t const*>(ContextPtr);
GD = *MemData;
} else {
uint8_t const *MemData = reinterpret_cast<uint8_t const*>(ContextPtr);
GD = *MemData;
}
}
DEF_OP(StoreFlag) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
uint8_t Arg = *GetSrc<uint8_t*>(Data->SSAData, Op->Value);
uint32_t Arg = *GetSrc<uint32_t*>(Data->SSAData, Op->Value);
uintptr_t ContextPtr = reinterpret_cast<uintptr_t>(Data->State->CurrentFrame);
ContextPtr += offsetof(FEXCore::Core::CPUState, flags[0]);
ContextPtr += Op->Flag;
uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
*MemData = Arg;
if (Op->Flag == 24 /* NZCV */) {
uint32_t *MemData = reinterpret_cast<uint32_t*>(ContextPtr);
*MemData = Arg;
} else {
uint8_t *MemData = reinterpret_cast<uint8_t*>(ContextPtr);
*MemData = Arg;
}
}
DEF_OP(LoadMem) {

View File

@ -84,6 +84,21 @@ DEF_OP(Add) {
}
}
DEF_OP(TestNZ) {
auto Op = IROp->C<IR::IROp_TestNZ>();
const uint8_t OpSize = Op->Size;
LOGMAN_THROW_AA_FMT(OpSize == 4 || OpSize == 8, "Unsupported {} size: {}", __func__, OpSize);
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto Dst = GetReg(Node);
const auto ZeroReg = ARMEmitter::Reg::zr;
cmn(EmitSize, GetReg(Op->Src1.ID()), ZeroReg);
// TODO: Optimize this out
mrs(Dst, ARMEmitter::SystemRegister::NZCV);
}
DEF_OP(Sub) {
auto Op = IROp->C<IR::IROp_Sub>();
const uint8_t OpSize = IROp->Size;
@ -331,6 +346,40 @@ DEF_OP(Or) {
}
}
DEF_OP(Orlshl) {
auto Op = IROp->C<IR::IROp_Orlshl>();
const uint8_t OpSize = IROp->Size;
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto Dst = GetReg(Node);
const auto Src1 = GetReg(Op->Src1.ID());
uint64_t Const;
if (IsInlineConstant(Op->Src2, &Const)) {
orr(EmitSize, Dst, Src1, Const << Op->BitShift);
} else {
const auto Src2 = GetReg(Op->Src2.ID());
orr(EmitSize, Dst, Src1, Src2, ARMEmitter::ShiftType::LSL, Op->BitShift);
}
}
DEF_OP(Orlshr) {
auto Op = IROp->C<IR::IROp_Orlshr>();
const uint8_t OpSize = IROp->Size;
const auto EmitSize = OpSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto Dst = GetReg(Node);
const auto Src1 = GetReg(Op->Src1.ID());
uint64_t Const;
if (IsInlineConstant(Op->Src2, &Const)) {
orr(EmitSize, Dst, Src1, Const >> Op->BitShift);
} else {
const auto Src2 = GetReg(Op->Src2.ID());
orr(EmitSize, Dst, Src1, Src2, ARMEmitter::ShiftType::LSR, Op->BitShift);
}
}
DEF_OP(And) {
auto Op = IROp->C<IR::IROp_And>();
const uint8_t OpSize = IROp->Size;

View File

@ -845,6 +845,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
REGISTER_OP(INLINEENTRYPOINTOFFSET, InlineEntrypointOffset);
REGISTER_OP(CYCLECOUNTER, CycleCounter);
REGISTER_OP(ADD, Add);
REGISTER_OP(TESTNZ, TestNZ);
REGISTER_OP(SUB, Sub);
REGISTER_OP(NEG, Neg);
REGISTER_OP(ABS, Abs);
@ -857,6 +858,8 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry,
REGISTER_OP(MULH, MulH);
REGISTER_OP(UMULH, UMulH);
REGISTER_OP(OR, Or);
REGISTER_OP(ORLSHL, Orlshl);
REGISTER_OP(ORLSHR, Orlshr);
REGISTER_OP(AND, And);
REGISTER_OP(ANDN, Andn);
REGISTER_OP(XOR, Xor);
@ -1181,7 +1184,9 @@ fextl::unique_ptr<CPUBackend> CreateArm64JITCore(FEXCore::Context::ContextImpl *
CPUBackendFeatures GetArm64JITBackendFeatures() {
return CPUBackendFeatures {
.SupportsStaticRegisterAllocation = true
.SupportsStaticRegisterAllocation = true,
.SupportsShiftedBitwise = true,
.SupportsFlags = true,
};
}

View File

@ -236,6 +236,7 @@ private:
DEF_OP(InlineEntrypointOffset);
DEF_OP(CycleCounter);
DEF_OP(Add);
DEF_OP(TestNZ);
DEF_OP(Sub);
DEF_OP(Neg);
DEF_OP(Abs);
@ -248,6 +249,8 @@ private:
DEF_OP(MulH);
DEF_OP(UMulH);
DEF_OP(Or);
DEF_OP(Orlshl);
DEF_OP(Orlshr);
DEF_OP(And);
DEF_OP(Andn);
DEF_OP(Xor);

View File

@ -1051,12 +1051,20 @@ DEF_OP(FillRegister) {
DEF_OP(LoadFlag) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
auto Dst = GetReg(Node);
ldrb(Dst, STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
if (Op->Flag == 24 /* NZCV */)
ldr(Dst.W(), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
else
ldrb(Dst, STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
}
DEF_OP(StoreFlag) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
if (Op->Flag == 24 /* NZCV */)
str(GetReg(Op->Value.ID()).W(), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
else
strb(GetReg(Op->Value.ID()), STATE, offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag);
}
FEXCore::ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(uint8_t AccessSize,

View File

@ -601,14 +601,22 @@ DEF_OP(LoadFlag) {
auto Op = IROp->C<IR::IROp_LoadFlag>();
auto Dst = GetDst<RA_64>(Node);
movzx(Dst, byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)]);
if (Op->Flag == 24 /* NZCV */)
mov(Dst.cvt32(), dword [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)]);
else
movzx(Dst, byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)]);
}
DEF_OP(StoreFlag) {
auto Op = IROp->C<IR::IROp_StoreFlag>();
mov (rax, GetSrc<RA_64>(Op->Value.ID()));
mov(byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)], al);
if (Op->Flag == 24 /* NZCV */)
mov(dword [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)], eax);
else
mov(byte [STATE + (offsetof(FEXCore::Core::CPUState, flags[0]) + Op->Flag)], al);
}
Xbyak::RegExp X86JITCore::GenerateModRM(Xbyak::Reg Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) const {

View File

@ -145,6 +145,7 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) {
if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_BLOCK_END) {
// RIP could have been updated after coming back from the Syscall.
NewRIP = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, rip));
CalculateDeferredFlags();
_ExitFunction(NewRIP);
}
}
@ -178,6 +179,7 @@ void OpDispatchBuilder::ThunkOp(OpcodeArgs) {
// Store the new stack pointer
StoreGPRRegister(X86State::REG_RSP, NewSP);
CalculateDeferredFlags();
// Store the new RIP
_ExitFunction(NewRIP);
@ -240,6 +242,7 @@ void OpDispatchBuilder::RETOp(OpcodeArgs) {
// Store the new stack pointer
StoreGPRRegister(X86State::REG_RSP, NewSP);
CalculateDeferredFlags();
// Store the new RIP
_ExitFunction(NewRIP);
@ -304,6 +307,7 @@ void OpDispatchBuilder::IRETOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RSP, SP);
}
CalculateDeferredFlags();
_ExitFunction(NewRIP);
BlockSetRIP = true;
}
@ -313,6 +317,7 @@ void OpDispatchBuilder::CallbackReturnOp(OpcodeArgs) {
// Store the new RIP
_CallbackReturn();
auto NewRIP = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, rip));
CalculateDeferredFlags();
// This ExitFunction won't actually get hit but needs to exist
_ExitFunction(NewRIP);
BlockSetRIP = true;
@ -810,6 +815,7 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) {
LOGMAN_THROW_A_FMT(Op->Src[0].IsLiteral(), "Had wrong operand type");
const uint64_t TargetRIP = Op->PC + Op->InstSize + Op->Src[0].Data.Literal.Value;
CalculateDeferredFlags();
if (NextRIP != TargetRIP) {
// Store the RIP
_ExitFunction(NewRIP); // If we get here then leave the function now
@ -840,6 +846,7 @@ void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) {
_StoreMem(GPRClass, Size, NewSP, ConstantPCReturn, Size);
// Store the RIP
CalculateDeferredFlags();
_ExitFunction(JMPPCOffset); // If we get here then leave the function now
}
@ -1124,6 +1131,7 @@ void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) {
Target &= 0xFFFFFFFFU;
}
CalculateDeferredFlags();
auto TrueBlock = JumpTargets.find(Target);
auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize);
@ -1269,6 +1277,7 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) {
SrcCond = _And(SrcCond, ZF);
}
CalculateDeferredFlags();
auto TrueBlock = JumpTargets.find(Target);
auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize);
@ -1337,6 +1346,7 @@ void OpDispatchBuilder::JUMPOp(OpcodeArgs) {
TargetRIP &= 0xFFFFFFFFU;
}
CalculateDeferredFlags();
// This is just an unconditional relative literal jump
if (Multiblock) {
auto JumpBlock = JumpTargets.find(TargetRIP);
@ -1375,6 +1385,7 @@ void OpDispatchBuilder::JUMPAbsoluteOp(OpcodeArgs) {
// This uses ModRM to determine its location
// No way to use this effectively in multiblock
auto RIPOffset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
CalculateDeferredFlags();
// Store the new RIP
_ExitFunction(RIPOffset);
@ -3407,12 +3418,14 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) {
auto AL = LoadGPRRegister(X86State::REG_RAX, 1);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(0));
CalculateDeferredFlags();
auto Cond = _Or(AF, _Select(FEXCore::IR::COND_UGT, _And(AL, _Constant(0xF)), _Constant(9), _Constant(1), _Constant(0)));
auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock);
auto EndBlock = CreateNewCodeBlockAfter(TrueBlock);
CalculateDeferredFlags();
_CondJump(Cond, TrueBlock, FalseBlock);
SetCurrentCodeBlock(FalseBlock);
{
@ -3431,6 +3444,7 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) {
// So Or(CF, _Constant(0)) ill mean CF gets updated to the old value in the true case?
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Or(CF, NewCF));
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(_Constant(1));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(EndBlock);
@ -3443,6 +3457,7 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) {
SetCurrentCodeBlock(FalseBlock);
{
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(0));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(TrueBlock);
@ -3452,6 +3467,7 @@ void OpDispatchBuilder::DAAOp(OpcodeArgs) {
auto NewAL = _Add(AL, _Constant(0x60));
StoreGPRRegister(X86State::REG_RAX, NewAL, 1);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(1));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(EndBlock);
@ -3470,12 +3486,14 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {
auto AL = LoadGPRRegister(X86State::REG_RAX, 1);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(0));
CalculateDeferredFlags();
auto Cond = _Or(AF, _Select(FEXCore::IR::COND_UGT, _And(AL, _Constant(0xf)), _Constant(9), _Constant(1), _Constant(0)));
auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
auto TrueBlock = CreateNewCodeBlockAfter(FalseBlock);
auto EndBlock = CreateNewCodeBlockAfter(TrueBlock);
CalculateDeferredFlags();
_CondJump(Cond, TrueBlock, FalseBlock);
SetCurrentCodeBlock(FalseBlock);
{
@ -3494,6 +3512,7 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {
// So Or(CF, _Constant(0)) ill mean CF gets updated to the old value in the true case?
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Or(CF, NewCF));
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(_Constant(1));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(EndBlock);
@ -3506,6 +3525,7 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {
SetCurrentCodeBlock(FalseBlock);
{
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(0));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(TrueBlock);
@ -3514,6 +3534,7 @@ void OpDispatchBuilder::DASOp(OpcodeArgs) {
auto NewAL = _Sub(AL, _Constant(0x60));
StoreGPRRegister(X86State::REG_RAX, NewAL, 1);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(1));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(EndBlock);
@ -3543,6 +3564,7 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RAX, NewAX, 2);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(0));
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(_Constant(0));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(TrueBlock);
@ -3552,6 +3574,7 @@ void OpDispatchBuilder::AAAOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RAX, Result, 2);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(1));
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(_Constant(1));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(EndBlock);
@ -3576,6 +3599,7 @@ void OpDispatchBuilder::AASOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RAX, NewAX, 2);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(0));
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(_Constant(0));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(TrueBlock);
@ -3586,6 +3610,7 @@ void OpDispatchBuilder::AASOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RAX, Result, 2);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(_Constant(1));
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(_Constant(1));
CalculateDeferredFlags();
_Jump(EndBlock);
}
SetCurrentCodeBlock(EndBlock);
@ -4024,6 +4049,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
OrderedNode *ZF = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC);
CalculateDeferredFlags();
InternalCondJump = _CondJump(ZF, {REPE ? COND_NEQ : COND_EQ});
// Jump back to the start if we have more work to do
@ -4242,6 +4268,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) {
StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
OrderedNode *ZF = GetRFLAG(FEXCore::X86State::RFLAG_ZF_LOC);
CalculateDeferredFlags();
InternalCondJump = _CondJump(ZF, {REPE ? COND_NEQ : COND_EQ});
// Jump back to the start if we have more work to do
@ -4675,6 +4702,7 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) {
// Set ZF
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(ZFResult);
CalculateDeferredFlags();
auto CondJump = _CondJump(ZFResult);
@ -4739,6 +4767,7 @@ void OpDispatchBuilder::Finalize() {
// We haven't emitted. Dump out to the dispatcher
SetCurrentCodeBlock(Handler.second.BlockEntry);
CalculateDeferredFlags();
_ExitFunction(_EntrypointOffset(Handler.first - Entry, GPRSize));
}
}
@ -5459,6 +5488,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) {
if (Op->OP == 0xCE) { // Conditional to only break if Overflow == 1
auto Flag = GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC);
CalculateDeferredFlags();
// If condition doesn't hold then keep going
auto CondJump = _CondJump(Flag, {COND_EQ});

View File

@ -83,6 +83,10 @@ public:
void StartNewBlock() {
flagsOp = SelectionFlag::Nothing;
// If we loaded flags but didn't change them, invalidate the cached copy and move on.
// Changes get stored out by CalculateDeferredFlags.
CachedNZCV = nullptr;
}
bool FinishOp(uint64_t NextRIP, bool LastOp) {
@ -834,6 +838,22 @@ private:
OrderedNode* flagsOpDestSigned{};
OrderedNode* flagsOpSrcSigned{};
static bool IsNZCV(unsigned BitOffset) {
switch (BitOffset) {
case FEXCore::X86State::RFLAG_CF_LOC:
case FEXCore::X86State::RFLAG_ZF_LOC:
case FEXCore::X86State::RFLAG_SF_LOC:
case FEXCore::X86State::RFLAG_OF_LOC:
return true;
default:
return false;
}
}
OrderedNode* CachedNZCV = {};
uint32_t PossiblySetNZCVBits = 0;
fextl::map<uint64_t, JumpTargetInfo> JumpTargets;
bool HandledLock{false};
bool DecodeFailure{false};
@ -1041,19 +1061,112 @@ private:
[[nodiscard]] uint32_t GetDstBitSize(X86Tables::DecodedOp Op) const;
[[nodiscard]] uint32_t GetSrcBitSize(X86Tables::DecodedOp Op) const;
static inline constexpr unsigned IndexNZCV(unsigned BitOffset) {
switch (BitOffset) {
case FEXCore::X86State::RFLAG_OF_LOC: return 28;
case FEXCore::X86State::RFLAG_CF_LOC: return 29;
case FEXCore::X86State::RFLAG_ZF_LOC: return 30;
case FEXCore::X86State::RFLAG_SF_LOC: return 31;
default: FEX_UNREACHABLE;
}
}
OrderedNode *GetNZCV() {
if (!CachedNZCV) {
CachedNZCV = _LoadFlag(FEXCore::X86State::RFLAG_NZCV_LOC);
// We don't know what's set
PossiblySetNZCVBits = ~0;
}
return CachedNZCV;
}
void SetNZCV(OrderedNode *Value) {
CachedNZCV = Value;
}
void ZeroNZCV() {
CachedNZCV = _Constant(0);
PossiblySetNZCVBits = 0;
}
void SetN_ZeroZCV(unsigned SrcSize, OrderedNode *Res) {
static_assert(IndexNZCV(FEXCore::X86State::RFLAG_SF_LOC) == 31);
unsigned NBit = 31;
unsigned SignBit = (SrcSize * 8) - 1;
OrderedNode *Shifted;
// Shift the sign bit into the N bit
if (SignBit > NBit)
Shifted = _Ashr(Res, _Constant(SignBit - NBit));
else if (SignBit < NBit)
Shifted = _Lshl(Res, _Constant(NBit - SignBit));
else
Shifted = Res;
// Mask off just the N bit, which now equals the sign bit
CachedNZCV = _And(Shifted, _Constant(1u << NBit));
PossiblySetNZCVBits = (1u << NBit);
}
void SetNZ_ZeroCV(unsigned SrcSize, OrderedNode *Res) {
// The TestNZ opcode does this operation natively for 32-bit or 64-bit.
// Otherwise we can implement the functionality ourselves with some bit math.
if (CTX->BackendFeatures.SupportsFlags && SrcSize >= 4) {
CachedNZCV = _TestNZ(SrcSize, Res);
PossiblySetNZCVBits = (1u << 31) | (1u << 30);
} else {
// N
SetN_ZeroZCV(SrcSize, Res);
// Z
auto Zero = _Constant(0);
auto One = _Constant(1);
auto SelectOp = _Select(FEXCore::IR::COND_EQ, Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
}
OrderedNode *InsertNZCV(OrderedNode *NZCV, unsigned BitOffset, OrderedNode *Value) {
unsigned Bit = IndexNZCV(BitOffset);
uint32_t SetBits = PossiblySetNZCVBits;
PossiblySetNZCVBits |= (1u << Bit);
if (SetBits == 0)
return _Lshl(Value, _Constant(Bit));
else if (CTX->BackendFeatures.SupportsShiftedBitwise && (SetBits & (1u << Bit)) == 0)
return _Orlshl(NZCV, Value, Bit);
else
return _Bfi(4, 1, Bit, NZCV, Value);
}
template<unsigned BitOffset>
void SetRFLAG(OrderedNode *Value) {
flagsOp = SelectionFlag::Nothing;
_StoreFlag(Value, BitOffset);
SetRFLAG(Value, BitOffset);
}
void SetRFLAG(OrderedNode *Value, unsigned BitOffset) {
flagsOp = SelectionFlag::Nothing;
_StoreFlag(Value, BitOffset);
if (IsNZCV(BitOffset))
SetNZCV(InsertNZCV(GetNZCV(), BitOffset, Value));
else
_StoreFlag(Value, BitOffset);
}
OrderedNode *GetRFLAG(unsigned BitOffset) {
return _LoadFlag(BitOffset);
if (IsNZCV(BitOffset)) {
if (!CachedNZCV || (PossiblySetNZCVBits & (1u << IndexNZCV(BitOffset))))
return _Bfe(1, 1, IndexNZCV(BitOffset), GetNZCV());
else
return _Constant(0);
} else {
return _LoadFlag(BitOffset);
}
}
OrderedNode *SelectCC(uint8_t OP, OrderedNode *TrueValue, OrderedNode *FalseValue);

View File

@ -63,21 +63,56 @@ OrderedNode *OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
// Calculate flags early.
CalculateDeferredFlags();
OrderedNode *Original = _Constant((1U << X86State::RFLAG_RESERVED_LOC) & FlagsMask ? 2 : 0);
OrderedNode *Original = _Constant(0);
// SF/ZF and N/Z are together on both arm64 and x86_64, so we special case that.
bool GetNZ = (FlagsMask & (1 << FEXCore::X86State::RFLAG_SF_LOC)) &&
(FlagsMask & (1 << FEXCore::X86State::RFLAG_ZF_LOC)) &&
CTX->BackendFeatures.SupportsShiftedBitwise;
// Handle CF first, since it's at bit 0 and hence doesn't need shift or OR.
if (FlagsMask & (1 << FEXCore::X86State::RFLAG_CF_LOC)) {
static_assert(FEXCore::X86State::RFLAG_CF_LOC == 0);
Original = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
}
for (size_t i = 0; i < FlagOffsets.size(); ++i) {
const auto FlagOffset = FlagOffsets[i];
if (!((1U << FlagOffset) & FlagsMask)) {
continue;
}
if ((GetNZ && (FlagOffset == FEXCore::X86State::RFLAG_SF_LOC ||
FlagOffset == FEXCore::X86State::RFLAG_ZF_LOC)) ||
FlagOffset == FEXCore::X86State::RFLAG_CF_LOC) {
// Already handled
continue;
}
// Note that the Bfi only considers the bottom bit of the flag, the rest of
// the byte is allowed to be garbage.
OrderedNode *Flag = FlagOffset == FEXCore::X86State::RFLAG_PF_LOC ?
LoadPF() :
_LoadFlag(FlagOffset);
GetRFLAG(FlagOffset);
Original = _Bfi(4, 1, FlagOffset, Original, Flag);
if (CTX->BackendFeatures.SupportsShiftedBitwise)
Original = _Orlshl(Original, Flag, FlagOffset);
else
Original = _Bfi(4, 1, FlagOffset, Original, Flag);
}
// OR in the SF/ZF flags at the end, allowing the lshr to fold with the OR
if (GetNZ) {
static_assert(FEXCore::X86State::RFLAG_SF_LOC == (FEXCore::X86State::RFLAG_ZF_LOC + 1));
auto NZCV = GetNZCV();
auto NZ = _And(NZCV, _Constant(0b11u << 30));
Original = _Orlshr(Original, NZ, 31 - FEXCore::X86State::RFLAG_SF_LOC);
}
// The constant is OR'ed in at the end, to avoid a pointless or xzr, #2.
if ((1U << X86State::RFLAG_RESERVED_LOC) & FlagsMask)
Original = _Or(Original, _Constant(2));
return Original;
}
@ -138,6 +173,10 @@ void OpDispatchBuilder::CalculatePF(OrderedNode *Res, OrderedNode *condition) {
void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
if (CurrentDeferredFlags.Type == FlagsGenerationType::TYPE_NONE) {
// Nothing to do
if (CachedNZCV)
_StoreFlag(CachedNZCV, FEXCore::X86State::RFLAG_NZCV_LOC);
CachedNZCV = nullptr;
return;
}
@ -320,6 +359,11 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
// Done calculating
CurrentDeferredFlags.Type = FlagsGenerationType::TYPE_NONE;
if (CachedNZCV)
_StoreFlag(CachedNZCV, FEXCore::X86State::RFLAG_NZCV_LOC);
CachedNZCV = nullptr;
}
void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2, OrderedNode *CF) {
@ -332,20 +376,10 @@ void OpDispatchBuilder::CalculateFlags_ADC(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(AFRes);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignOp);
}
CalculatePF(Res);
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
// CF
// Unsigned
@ -371,20 +405,10 @@ void OpDispatchBuilder::CalculateFlags_SBB(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(AFRes);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignOp);
}
CalculatePF(Res);
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
// CF
// Unsigned
@ -417,20 +441,13 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(AFRes);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignOp);
}
CalculatePF(Res);
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// Stash CF before zeroing it
auto OldCF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
// CF
if (UpdateCF) {
@ -438,7 +455,10 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
Src1, Src2, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
} else {
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(OldCF);
}
// OF
{
auto XorOp1 = _Xor(Src1, Src2);
@ -462,25 +482,21 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(AFRes);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignOp);
}
CalculatePF(Res);
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// Stash CF before zeroing it
auto OldCF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
// CF
if (UpdateCF) {
auto SelectOp = _Select(FEXCore::IR::COND_ULT, Res, Src2, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
} else {
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(OldCF);
}
CalculateOF_Add(SrcSize, Res, Src1, Src2);
@ -488,15 +504,12 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or
void OpDispatchBuilder::CalculateFlags_MUL(uint8_t SrcSize, OrderedNode *Res, OrderedNode *High) {
auto Zero = _Constant(0);
auto One = _Constant(1);
// PF/AF/ZF/SF
// Undefined
{
SetRFLAG<FEXCore::X86State::RFLAG_PF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(Zero);
}
// CF/OF
@ -506,24 +519,22 @@ void OpDispatchBuilder::CalculateFlags_MUL(uint8_t SrcSize, OrderedNode *Res, Or
auto SignBit = _Sbfe(1, SrcSize * 8 - 1, Res);
auto SelectOp = _Select(FEXCore::IR::COND_EQ, High, SignBit, Zero, One);
auto CV = _Constant((1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_LOC)) |
(1u << IndexNZCV(FEXCore::X86State::RFLAG_OF_LOC)));
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(SelectOp);
// Set CV accordingly and zero NZ regardless
SetNZCV(_Select(FEXCore::IR::COND_EQ, High, SignBit, Zero, CV));
}
}
void OpDispatchBuilder::CalculateFlags_UMUL(OrderedNode *High) {
auto Zero = _Constant(0);
auto One = _Constant(1);
// AF/SF/PF/ZF
// Undefined
{
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_PF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(Zero);
}
// CF/OF
@ -531,16 +542,15 @@ void OpDispatchBuilder::CalculateFlags_UMUL(OrderedNode *High) {
// CF and OF are set if the result of the operation can't be fit in to the destination register
// The result register will be all zero if it can't fit due to how multiplication behaves
auto SelectOp = _Select(FEXCore::IR::COND_EQ, High, Zero, Zero, One);
auto CV = _Constant((1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_LOC)) |
(1u << IndexNZCV(FEXCore::X86State::RFLAG_OF_LOC)));
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(SelectOp);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(SelectOp);
SetNZCV(_Select(FEXCore::IR::COND_EQ, High, Zero, Zero, CV));
}
}
void OpDispatchBuilder::CalculateFlags_Logical(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
auto Zero = _Constant(0);
auto One = _Constant(1);
// AF
{
// Undefined
@ -548,36 +558,19 @@ void OpDispatchBuilder::CalculateFlags_Logical(uint8_t SrcSize, OrderedNode *Res
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignOp);
}
CalculatePF(Res);
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// CF/OF
{
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(Zero);
}
// SF/ZF/CF/OF
SetNZ_ZeroCV(SrcSize, Res);
}
#define COND_FLAG_SET(cond, flag, newflag) \
auto oldflag = GetRFLAG(FEXCore::X86State::flag);\
auto newval = _Select(FEXCore::IR::COND_EQ, cond, Zero, oldflag, newflag);\
SetRFLAG<FEXCore::X86State::flag>(newval);
void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
auto Zero = _Constant(0);
auto One = _Constant(1);
auto OldNZCV = GetNZCV();
uint32_t OldSetNZCVBits = PossiblySetNZCVBits;
SetNZ_ZeroCV(SrcSize, Res);
// CF
{
@ -585,121 +578,88 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode *R
auto Size = _Constant(SrcSize * 8);
auto ShiftAmt = _Sub(Size, Src2);
auto LastBit = _Bfe(1, 0, _Lshr(Src1, ShiftAmt));
COND_FLAG_SET(Src2, RFLAG_CF_LOC, LastBit);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(LastBit);
}
CalculatePF(Res, Src2);
// AF
{
// Undefined
// Set to zero anyway
COND_FLAG_SET(Src2, RFLAG_AF_LOC, Zero);
}
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
COND_FLAG_SET(Src2, RFLAG_ZF_LOC, SelectOp);
}
// SF
{
auto val = _Bfe(1, SrcSize * 8 - 1, Res);
COND_FLAG_SET(Src2, RFLAG_SF_LOC, val);
}
// Undefined
// OF
{
// In the case of left shift. OF is only set from the result of <Top Source Bit> XOR <Top Result Bit>
// When Shift > 1 then OF is undefined
auto val = _Bfe(1, SrcSize * 8 - 1, _Xor(Src1, Res));
COND_FLAG_SET(Src2, RFLAG_OF_LOC, val);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(val);
}
// Now select between the two
SetNZCV(_Select(FEXCore::IR::COND_EQ, Src2, Zero, OldNZCV, GetNZCV()));
PossiblySetNZCVBits |= OldSetNZCVBits;
}
void OpDispatchBuilder::CalculateFlags_ShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
auto Zero = _Constant(0);
auto One = _Constant(1);
auto OldNZCV = GetNZCV();
uint32_t OldSetNZCVBits = PossiblySetNZCVBits;
SetNZ_ZeroCV(SrcSize, Res);
// CF
{
// Extract the last bit shifted in to CF
auto ShiftAmt = _Sub(Src2, One);
auto LastBit = _Bfe(1, 0, _Lshr(Src1, ShiftAmt));
COND_FLAG_SET(Src2, RFLAG_CF_LOC, LastBit);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(LastBit);
}
CalculatePF(Res, Src2);
// AF
{
// Undefined
// Set to zero anyway
COND_FLAG_SET(Src2, RFLAG_AF_LOC, Zero);
}
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
COND_FLAG_SET(Src2, RFLAG_ZF_LOC, SelectOp);
}
// SF
{
auto val =_Bfe(1, SrcSize * 8 - 1, Res);
COND_FLAG_SET(Src2, RFLAG_SF_LOC, val);
}
// Undefined
// OF
{
// Only defined when Shift is 1 else undefined
// OF flag is set if a sign change occurred
auto val = _Bfe(1, SrcSize * 8 - 1, _Xor(Src1, Res));
COND_FLAG_SET(Src2, RFLAG_OF_LOC, val);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(val);
}
// Now select between the two
SetNZCV(_Select(FEXCore::IR::COND_EQ, Src2, Zero, OldNZCV, GetNZCV()));
PossiblySetNZCVBits |= OldSetNZCVBits;
}
void OpDispatchBuilder::CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
auto Zero = _Constant(0);
auto One = _Constant(1);
auto OldNZCV = GetNZCV();
uint32_t OldSetNZCVBits = PossiblySetNZCVBits;
// SF/ZF/OF
SetNZ_ZeroCV(SrcSize, Res);
// CF
{
// Extract the last bit shifted in to CF
auto ShiftAmt = _Sub(Src2, One);
auto LastBit = _Bfe(1, 0, _Lshr(Src1, ShiftAmt));
COND_FLAG_SET(Src2, RFLAG_CF_LOC, LastBit);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(LastBit);
}
CalculatePF(Res, Src2);
// AF
{
// Undefined
// Set to zero anyway
COND_FLAG_SET(Src2, RFLAG_AF_LOC, Zero);
}
// Undefined
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
COND_FLAG_SET(Src2, RFLAG_ZF_LOC, SelectOp);
}
// SF
{
auto SignBitOp = _Bfe(1, SrcSize * 8 - 1, Res);
COND_FLAG_SET(Src2, RFLAG_SF_LOC, SignBitOp);
}
// OF
{
COND_FLAG_SET(Src2, RFLAG_OF_LOC, Zero);
}
// Now select between the two
SetNZCV(_Select(FEXCore::IR::COND_EQ, Src2, Zero, OldNZCV, GetNZCV()));
PossiblySetNZCVBits |= OldSetNZCVBits;
}
void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) {
@ -707,7 +667,8 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(uint8_t SrcSize, Order
if (Shift == 0) return;
auto Zero = _Constant(0);
auto One = _Constant(1);
SetNZ_ZeroCV(SrcSize, Res);
// CF
{
@ -728,24 +689,14 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(uint8_t SrcSize, Order
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
}
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignOp);
// OF
// In the case of left shift. OF is only set from the result of <Top Source Bit> XOR <Top Result Bit>
if (Shift == 1) {
auto SourceBit = _Bfe(1, SrcSize * 8 - 1, Src1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(_Xor(SourceBit, SignOp));
}
// OF
// In the case of left shift. OF is only set from the result of <Top Source Bit> XOR <Top Result Bit>
if (Shift == 1) {
auto Xor = _Xor(Res, Src1);
auto OF = _Bfe(1, SrcSize * 8 - 1, Xor);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(OF);
} else {
// Undefined, we choose to zero as part of SetNZ_ZeroCV
}
}
@ -754,7 +705,8 @@ void OpDispatchBuilder::CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize,
if (Shift == 0) return;
auto Zero = _Constant(0);
auto One = _Constant(1);
SetNZ_ZeroCV(SrcSize, Res);
// CF
{
@ -771,31 +723,18 @@ void OpDispatchBuilder::CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize,
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
}
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// SF
{
auto SignBitOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignBitOp);
// OF
// Only defined when Shift is 1 else undefined
// Only is set if the top bit was set to 1 when shifted
// So it is set to same value as SF
if (Shift == 1) {
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(Zero);
}
}
// OF
// Only defined when Shift is 1 else undefined. Only is set if the top bit was set to 1 when
// shifted So it is set to zero. In the undefined case we choose to zero as well. Since it was
// already zeroed there's nothing to do here.
}
void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) {
auto Zero = _Constant(0);
auto One = _Constant(1);
// Stash OF before overwriting it
auto OldOF = Shift != 1 ? GetRFLAG(FEXCore::X86State::RFLAG_OF_LOC) : NULL;
SetNZ_ZeroCV(SrcSize, Res);
// CF
{
@ -812,17 +751,9 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
}
// ZF
{
auto SelectOp = _Select(FEXCore::IR::COND_EQ,
Res, Zero, One, Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(SelectOp);
}
// SF
{
auto SignBitOp = _Bfe(1, SrcSize * 8 - 1, Res);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(SignBitOp);
// Preserve OF if it won't be written
if (Shift != 1) {
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(OldOF);
}
}
@ -1000,14 +931,10 @@ void OpDispatchBuilder::CalculateFlags_BEXTR(OrderedNode *Src) {
//
// CF and OF are defined as being set to zero
//
SetRFLAG<X86State::RFLAG_CF_LOC>(Zero);
SetRFLAG<X86State::RFLAG_OF_LOC>(Zero);
// Every other flag is considered undefined after a
// BEXTR instruction, but we opt to reliably clear them.
//
SetRFLAG<X86State::RFLAG_AF_LOC>(Zero);
SetRFLAG<X86State::RFLAG_SF_LOC>(Zero);
ZeroNZCV();
// PF
if (CTX->Config.ABINoPF) {
@ -1038,7 +965,8 @@ void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src) {
auto Zero = _Constant(0);
auto One = _Constant(1);
SetRFLAG<X86State::RFLAG_OF_LOC>(Zero);
SetNZ_ZeroCV(SrcSize, Src);
SetRFLAG<X86State::RFLAG_AF_LOC>(Zero);
if (CTX->Config.ABINoPF) {
_InvalidateFlags(1UL << X86State::RFLAG_PF_LOC);
@ -1046,14 +974,6 @@ void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src) {
SetRFLAG<X86State::RFLAG_PF_LOC>(Zero);
}
// ZF
{
auto ZFOp = _Select(IR::COND_EQ,
Src, Zero,
One, Zero);
SetRFLAG<X86State::RFLAG_ZF_LOC>(ZFOp);
}
// CF
{
auto CFOp = _Select(IR::COND_EQ,
@ -1061,12 +981,6 @@ void OpDispatchBuilder::CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src) {
Zero, One);
SetRFLAG<X86State::RFLAG_CF_LOC>(CFOp);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Src);
SetRFLAG<X86State::RFLAG_SF_LOC>(SignOp);
}
}
void OpDispatchBuilder::CalculateFlags_BLSMSK(OrderedNode *Src) {
@ -1095,7 +1009,8 @@ void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode *Result
auto Zero = _Constant(0);
auto One = _Constant(1);
SetRFLAG<X86State::RFLAG_OF_LOC>(Zero);
SetNZ_ZeroCV(SrcSize, Result);
SetRFLAG<X86State::RFLAG_AF_LOC>(Zero);
if (CTX->Config.ABINoPF) {
_InvalidateFlags(1UL << X86State::RFLAG_PF_LOC);
@ -1103,14 +1018,6 @@ void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode *Result
SetRFLAG<X86State::RFLAG_PF_LOC>(Zero);
}
// ZF
{
auto ZFOp = _Select(IR::COND_EQ,
Result, Zero,
One, Zero);
SetRFLAG<X86State::RFLAG_ZF_LOC>(ZFOp);
}
// CF
{
auto CFOp = _Select(IR::COND_EQ,
@ -1118,12 +1025,6 @@ void OpDispatchBuilder::CalculateFlags_BLSR(uint8_t SrcSize, OrderedNode *Result
Zero, One);
SetRFLAG<X86State::RFLAG_CF_LOC>(CFOp);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Result);
SetRFLAG<X86State::RFLAG_SF_LOC>(SignOp);
}
}
void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode *Src) {
@ -1134,12 +1035,11 @@ void OpDispatchBuilder::CalculateFlags_POPCOUNT(OrderedNode *Src) {
_Constant(1), Zero);
// Set flags
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Zero);
ZeroNZCV();
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(ZFResult);
SetRFLAG<FEXCore::X86State::RFLAG_PF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_AF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_ZF_LOC>(ZFResult);
SetRFLAG<FEXCore::X86State::RFLAG_SF_LOC>(Zero);
SetRFLAG<FEXCore::X86State::RFLAG_OF_LOC>(Zero);
}
void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode *Result, OrderedNode *Src) {
@ -1172,16 +1072,12 @@ void OpDispatchBuilder::CalculateFlags_BZHI(uint8_t SrcSize, OrderedNode *Result
One, Zero);
SetRFLAG<X86State::RFLAG_CF_LOC>(CFOp);
}
// SF
{
auto SignOp = _Bfe(1, SrcSize * 8 - 1, Result);
SetRFLAG<X86State::RFLAG_SF_LOC>(SignOp);
}
}
void OpDispatchBuilder::CalculateFlags_TZCNT(OrderedNode *Src) {
// OF, SF, AF, PF all undefined
ZeroNZCV();
auto Zero = _Constant(0);
auto ZFResult = _Select(FEXCore::IR::COND_EQ,
Src, Zero,
@ -1194,6 +1090,7 @@ void OpDispatchBuilder::CalculateFlags_TZCNT(OrderedNode *Src) {
void OpDispatchBuilder::CalculateFlags_LZCNT(uint8_t SrcSize, OrderedNode *Src) {
// OF, SF, AF, PF all undefined
ZeroNZCV();
auto Zero = _Constant(0);
auto ZFResult = _Select(FEXCore::IR::COND_EQ,
@ -1207,6 +1104,7 @@ void OpDispatchBuilder::CalculateFlags_LZCNT(uint8_t SrcSize, OrderedNode *Src)
void OpDispatchBuilder::CalculateFlags_BITSELECT(OrderedNode *Src) {
// OF, SF, AF, PF, CF all undefined
ZeroNZCV();
auto ZeroConst = _Constant(0);
auto OneConst = _Constant(1);
@ -1222,11 +1120,9 @@ void OpDispatchBuilder::CalculateFlags_BITSELECT(OrderedNode *Src) {
void OpDispatchBuilder::CalculateFlags_RDRAND(OrderedNode *Src) {
// OF, SF, ZF, AF, PF all zero
// CF is set to the incoming source
ZeroNZCV();
auto ZeroConst = _Constant(0);
SetRFLAG<X86State::RFLAG_OF_LOC>(ZeroConst);
SetRFLAG<X86State::RFLAG_SF_LOC>(ZeroConst);
SetRFLAG<X86State::RFLAG_ZF_LOC>(ZeroConst);
SetRFLAG<X86State::RFLAG_AF_LOC>(ZeroConst);
SetRFLAG<X86State::RFLAG_PF_LOC>(ZeroConst);
SetRFLAG<FEXCore::X86State::RFLAG_CF_LOC>(Src);

View File

@ -782,6 +782,14 @@
"Desc": ["Integer binary or"
]
},
"GPR = Orlshl GPR:$Src1, GPR:$Src2, u8:$BitShift": {
"Desc": ["Integer binary or with logical shift left"
]
},
"GPR = Orlshr GPR:$Src1, GPR:$Src2, u8:$BitShift": {
"Desc": ["Integer binary or with logical shift right"
]
},
"GPR = Xor GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer binary exclusive or"
]
@ -794,6 +802,10 @@
"Desc": ["Integer binary AND NOT. Performs the equivalent of Src1 & ~Src2"],
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
},
"GPR = TestNZ u8:$Size, GPR:$Src1": {
"Desc": ["Return NZCV for a GPR, setting N and Z accordingly and zeroing C and V"],
"DestSize": "4"
},
"GPR = Lshl u8:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer logical shift left"
],

View File

@ -690,6 +690,20 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
}
break;
}
case OP_TESTNZ: {
auto Op = IROp->CW<IR::IROp_TestNZ>();
uint64_t Constant1{};
if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1)) {
bool N = Constant1 & (1ull << ((Op->Size * 8) - 1));
bool Z = Constant1 == 0;
uint32_t NZVC = (N ? (1u << 31) : 0) | (Z ? (1u << 30) : 0);
IREmit->ReplaceWithConstant(CodeNode, NZVC);
Changed = true;
}
break;
}
case OP_OR: {
auto Op = IROp->CW<IR::IROp_Or>();
uint64_t Constant1{};
@ -707,6 +721,32 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
}
break;
}
case OP_ORLSHL: {
auto Op = IROp->CW<IR::IROp_Orlshl>();
uint64_t Constant1{};
uint64_t Constant2{};
if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) &&
IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) {
uint64_t NewConstant = Constant1 | (Constant2 << Op->BitShift);
IREmit->ReplaceWithConstant(CodeNode, NewConstant);
Changed = true;
}
break;
}
case OP_ORLSHR: {
auto Op = IROp->CW<IR::IROp_Orlshr>();
uint64_t Constant1{};
uint64_t Constant2{};
if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant1) &&
IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) {
uint64_t NewConstant = Constant1 | (Constant2 >> Op->BitShift);
IREmit->ReplaceWithConstant(CodeNode, NewConstant);
Changed = true;
}
break;
}
case OP_XOR: {
auto Op = IROp->C<IR::IROp_Xor>();
uint64_t Constant1{};

View File

@ -35,6 +35,8 @@ namespace CodeSerialize {
namespace CPU {
struct CPUBackendFeatures {
bool SupportsStaticRegisterAllocation = false;
bool SupportsShiftedBitwise = false;
bool SupportsFlags = false;
};
class CPUBackend {

View File

@ -74,6 +74,13 @@ enum X86RegLocation : uint32_t {
RFLAG_VIP_LOC = 20,
RFLAG_ID_LOC = 21,
// So we can implement arm64-like flag manipulaton on the interpreter/x86 jit..
// SF/ZF/CF/OF packed into a 32-bit word, matching arm64's NZCV structure (not semantics).
RFLAG_NZCV_LOC = 24,
RFLAG_NZCV_1_LOC = 25,
RFLAG_NZCV_2_LOC = 26,
RFLAG_NZCV_3_LOC = 27,
// So we can share flag handling logic, we put x87 flags after RFLAGS
X87FLAG_BASE = 32,
X87FLAG_IE_LOC = 32,