IR: handle 8/16-bit AddNZCV/SubNZCV

we can do it more effectively than the current s/w lowering.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2024-02-08 14:15:16 -04:00
parent 780b48620b
commit d7ff1b78fb
4 changed files with 52 additions and 61 deletions

View File

@ -97,16 +97,24 @@ DEF_OP(AddShift) {
DEF_OP(AddNZCV) {
auto Op = IROp->C<IR::IROp_AddNZCV>();
const auto OpSize = IROp->Size;
const uint8_t OpSize = IROp->Size;
LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
auto Src1 = GetReg(Op->Src1.ID());
uint64_t Const;
if (IsInlineConstant(Op->Src2, &Const)) {
cmn(EmitSize, GetReg(Op->Src1.ID()), Const);
LOGMAN_THROW_AA_FMT(OpSize >= 4, "Constant not allowed here");
cmn(EmitSize, Src1, Const);
} else {
cmn(EmitSize, GetReg(Op->Src1.ID()), GetReg(Op->Src2.ID()));
unsigned Shift = OpSize < 4 ? (32 - (8 * OpSize)) : 0;
if (OpSize < 4) {
lsl(ARMEmitter::Size::i32Bit, TMP1, Src1, Shift);
cmn(EmitSize, TMP1, GetReg(Op->Src2.ID()), ARMEmitter::ShiftType::LSL, Shift);
} else {
cmn(EmitSize, Src1, GetReg(Op->Src2.ID()));
}
}
}
@ -194,19 +202,35 @@ DEF_OP(SubShift) {
DEF_OP(SubNZCV) {
auto Op = IROp->C<IR::IROp_SubNZCV>();
const auto OpSize = IROp->Size;
const uint8_t OpSize = IROp->Size;
LOGMAN_THROW_AA_FMT(OpSize == IR::i32Bit || OpSize == IR::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
const auto EmitSize = OpSize == IR::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
uint64_t Const;
if (IsInlineConstant(Op->Src2, &Const)) {
LOGMAN_THROW_AA_FMT(OpSize >= 4, "Constant not allowed here");
cmp(EmitSize, GetReg(Op->Src1.ID()), Const);
} else if (IsInlineConstant(Op->Src1, &Const)) {
LOGMAN_THROW_AA_FMT(Const == 0, "Only valid constant");
cmp(EmitSize, ARMEmitter::Reg::zr, GetReg(Op->Src2.ID()));
} else {
cmp(EmitSize, GetReg(Op->Src1.ID()), GetReg(Op->Src2.ID()));
unsigned Shift = OpSize < 4 ? (32 - (8 * OpSize)) : 0;
ARMEmitter::Register ShiftedSrc1 = ARMEmitter::Reg::zr;
if (IsInlineConstant(Op->Src1, &Const)) {
LOGMAN_THROW_AA_FMT(Const == 0, "Only valid constant");
// Any shift of zero is still zero
} else {
ShiftedSrc1 = GetReg(Op->Src1.ID());
if (OpSize < 4) {
lsl(ARMEmitter::Size::i32Bit, TMP1, ShiftedSrc1, Shift);
ShiftedSrc1 = TMP1;
}
}
if (OpSize < 4) {
cmp(EmitSize, ShiftedSrc1, GetReg(Op->Src2.ID()), ARMEmitter::ShiftType::LSL, Shift);
} else {
cmp(EmitSize, ShiftedSrc1, GetReg(Op->Src2.ID()));
}
}
}

View File

@ -568,31 +568,16 @@ void OpDispatchBuilder::CalculateFlags_SUB(uint8_t SrcSize, OrderedNode *Res, Or
// Stash CF before stomping over it
auto OldCF = UpdateCF ? nullptr : GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
// TODO: Could do this path for small sources if we have FEAT_FlagM
if (SrcSize >= 4) {
_SubNZCV(OpSize, Src1, Src2);
CachedNZCV = nullptr;
NZCVDirty = false;
PossiblySetNZCVBits = ~0;
_SubNZCV(IR::SizeToOpSize(SrcSize), Src1, Src2);
CachedNZCV = nullptr;
NZCVDirty = false;
PossiblySetNZCVBits = ~0;
// We only bother inverting CF if we're actually going to update CF.
if (UpdateCF)
CarryInvert();
} else {
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
// CF
if (UpdateCF) {
// Grab carry bit from unmasked output.
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, SrcSize * 8, true);
}
CalculateOF(SrcSize, Res, Src1, Src2, true);
}
// We stomped over CF while calculation flags, restore it.
if (!UpdateCF)
// If we're updating CF, we need to invert it for correctness. If we're not
// updating CF, we need to restore the CF since we stomped over it.
if (UpdateCF)
CarryInvert();
else
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(OldCF);
}
@ -605,24 +590,10 @@ void OpDispatchBuilder::CalculateFlags_ADD(uint8_t SrcSize, OrderedNode *Res, Or
// Stash CF before stomping over it
auto OldCF = UpdateCF ? nullptr : GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
// TODO: Could do this path for small sources if we have FEAT_FlagM
if (SrcSize >= 4) {
_AddNZCV(OpSize, Src1, Src2);
CachedNZCV = nullptr;
NZCVDirty = false;
PossiblySetNZCVBits = ~0;
} else {
// SF/ZF
SetNZ_ZeroCV(SrcSize, Res);
// CF
if (UpdateCF) {
// Grab carry bit from unmasked output
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, SrcSize * 8, true);
}
CalculateOF(SrcSize, Res, Src1, Src2, false);
}
_AddNZCV(IR::SizeToOpSize(SrcSize), Src1, Src2);
CachedNZCV = nullptr;
NZCVDirty = false;
PossiblySetNZCVBits = ~0;
// We stomped over CF while calculation flags, restore it.
if (!UpdateCF)

View File

@ -964,10 +964,7 @@
"AddNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Set NZCV for the sum of two GPRs"],
"HasSideEffects": true,
"DestSize": "Size",
"EmitValidation": [
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
"DestSize": "Size"
},
"CarryInvert": {
"Desc": ["Invert carry flag in NZCV"],
@ -1029,10 +1026,7 @@
"Carry flag uses arm64 definition, inverted x86.",
""],
"DestSize": "Size",
"HasSideEffects": true,
"EmitValidation": [
"Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
]
"HasSideEffects": true
},
"GPR = Or OpSize:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer binary or"

View File

@ -970,7 +970,9 @@ bool ConstProp::ConstantInlining(IREmitter *IREmit, const IRListView& CurrentIR)
uint64_t Constant2{};
if (IREmit->IsValueConstant(Op->Header.Args[1], &Constant2)) {
if (IsImmAddSub(Constant2)) {
// We don't allow 8/16-bit operations to have constants, since no
// constant would be in bounds after the JIT's 24/16 shift.
if (IsImmAddSub(Constant2) && Op->Header.Size >= 4) {
IREmit->SetWriteCursor(CurrentIR.GetNode(Op->Header.Args[1]));
IREmit->ReplaceNodeArgument(CodeNode, 1, CreateInlineConstant(IREmit, Constant2));