Merge pull request #2749 from Sonicadvance1/optimize_away_redundant_masks

OpcodeDispatcher: Optimize some shifts size masking
This commit is contained in:
Mai 2023-07-10 08:08:57 -04:00 committed by GitHub
commit 98eda5e163
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 37 additions and 67 deletions

View File

@ -1804,15 +1804,7 @@ void OpDispatchBuilder::SHLOp(OpcodeArgs) {
}
const auto Size = GetSrcBitSize(Op);
// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(Src, _Constant(0x3F));
}
else {
Src = _And(Src, _Constant(0x1F));
}
OrderedNode *Result = _Lshl(Dest, Src);
OrderedNode *Result = _Lshl(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
StoreResult(GPRClass, Op, Result, -1);
if (Size < 32) {
@ -1866,17 +1858,7 @@ void OpDispatchBuilder::SHROp(OpcodeArgs) {
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
}
const auto Size = GetSrcBitSize(Op);
// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(Src, _Constant(0x3F));
}
else {
Src = _And(Src, _Constant(0x1F));
}
auto ALUOp = _Lshr(Dest, Src);
auto ALUOp = _Lshr(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
StoreResult(GPRClass, Op, ALUOp, -1);
if constexpr (SHR1Bit) {
@ -2117,18 +2099,11 @@ void OpDispatchBuilder::ASHROp(OpcodeArgs) {
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
}
// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(Src, _Constant(Size, 0x3F));
} else {
Src = _And(Src, _Constant(Size, 0x1F));
}
if (Size < 32) {
Dest = _Sbfe(Size, 0, Dest);
}
OrderedNode *Result = _Ashr(Dest, Src);
OrderedNode *Result = _Ashr(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
StoreResult(GPRClass, Op, Result, -1);
if constexpr (SHR1Bit) {
@ -2412,29 +2387,20 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {
auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
auto* Shift = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
const auto OperandSize = GetSrcBitSize(Op);
// x86 masks the shift by 0x3F or 0x1F depending on size of op
auto SanitizedShift = [&] {
if (OperandSize == 64) {
return _And(Shift, _Constant(0x3F));
} else {
return _And(Shift, _Constant(0x1F));
}
}();
const auto Size = GetSrcSize(Op);
auto* Result = [&]() -> OrderedNode* {
// SARX
if (Op->OP == 0x6F7) {
return _Ashr(Src, SanitizedShift);
return _Ashr(Size, Src, Shift);
}
// SHLX
if (Op->OP == 0x5F7) {
return _Lshl(Src, SanitizedShift);
return _Lshl(Size, Src, Shift);
}
// SHRX
return _Lshr(Src, SanitizedShift);
return _Lshr(Size, Src, Shift);
}();
StoreResult(GPRClass, Op, Result, -1);
@ -2631,19 +2597,12 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(Src, _Constant(Size, 0x3F));
} else {
Src = _And(Src, _Constant(Size, 0x1F));
}
// Res = Src >> Shift
OrderedNode *Res = _Lshr(Dest, Src);
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(_Constant(Size, Size + 1), Src);
auto TmpHigher = _Lshl(Dest, SrcShl);
auto TmpHigher = _Lshl(GetSrcSize(Op), Dest, SrcShl);
auto One = _Constant(Size, 1);
auto Zero = _Constant(Size, 0);
@ -2780,15 +2739,8 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(Src, _Constant(Size, 0x3F));
} else {
Src = _And(Src, _Constant(Size, 0x1F));
}
// Res = Src << Shift
OrderedNode *Res = _Lshl(Dest, Src);
OrderedNode *Res = _Lshl(GetSrcSize(Op), Dest, Src);
// Res |= (Src << (Size - Shift + 1));
OrderedNode *SrcShl = _Sub(_Constant(Size, Size + 1), Src);

View File

@ -787,20 +787,29 @@
"Desc": ["Integer binary AND NOT. Performs the equivalent of Src1 & ~Src2"],
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
},
"GPR = Lshl GPR:$Src1, GPR:$Src2": {
"GPR = Lshl u8:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer logical shift left"
],
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
"EmitValidation": [
"Size >= 4"
],
"DestSize": "Size"
},
"GPR = Lshr GPR:$Src1, GPR:$Src2": {
"GPR = Lshr u8:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer logical shift right"
],
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
"EmitValidation": [
"Size >= 4"
],
"DestSize": "Size"
},
"GPR = Ashr GPR:$Src1, GPR:$Src2": {
"GPR = Ashr u8:#Size, GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer arithmetic shift right"
],
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
"EmitValidation": [
"Size >= 4"
],
"DestSize": "Size"
},
"GPR = Ror GPR:$Src1, GPR:$Src2": {
"Desc": ["Integer rotate right"

View File

@ -93,6 +93,15 @@ friend class FEXCore::IR::PassManager;
IRPair<IROp_StoreMemTSO> _StoreMemTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, OrderedNode *Addr, OrderedNode *Value, uint8_t Align = 1) {
return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1);
}
IRPair<IROp_Lshl> _Lshl(OrderedNode *Src1, OrderedNode *Src2) {
return _Lshl(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
}
IRPair<IROp_Lshr> _Lshr(OrderedNode *Src1, OrderedNode *Src2) {
return _Lshr(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
}
IRPair<IROp_Ashr> _Ashr(OrderedNode *Src1, OrderedNode *Src2) {
return _Ashr(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
}
OrderedNode *Invalid() {
return InvalidNode;
}

View File

@ -22,14 +22,14 @@
%AddrA i64 = Constant #0x1000000
%MemValueA i32 = LoadMem GPR, #4, %AddrA i64, %Invalid, #4, SXTX, #1
%Shift i64 = Constant #0x1
%ResultA i32 = Lshl %MemValueA, %Shift
%ResultB i64 = Lshl %MemValueA, %Shift
%ResultA i32 = Lshl #4, %MemValueA, %Shift
%ResultB i64 = Lshl #8, %MemValueA, %Shift
(%Store i64) StoreRegister %ResultA i64, #0, #0x8, GPR, GPRFixed, #8
(%Store i64) StoreRegister %ResultB i64, #0, #0x20, GPR, GPRFixed, #8
; Constant optimisable version
%ValueB i64 = Constant #0x87654321
%ResultC i32 = Lshl %ValueB, %Shift
%ResultD i64 = Lshl %ValueB, %Shift
%ResultC i32 = Lshl #4, %ValueB, %Shift
%ResultD i64 = Lshl #8, %ValueB, %Shift
(%Store i64) StoreRegister %ResultC i64, #0, #0x10, GPR, GPRFixed, #8
(%Store i64) StoreRegister %ResultD i64, #0, #0x18, GPR, GPRFixed, #8
(%ssa7 i0) Break {0.11.0.128}