mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-24 16:51:32 +00:00
Merge pull request #2749 from Sonicadvance1/optimize_away_redundant_masks
OpcodeDispatcher: Optimize some shifts size masking
This commit is contained in:
commit
98eda5e163
@ -1804,15 +1804,7 @@ void OpDispatchBuilder::SHLOp(OpcodeArgs) {
|
||||
}
|
||||
const auto Size = GetSrcBitSize(Op);
|
||||
|
||||
// x86 masks the shift by 0x3F or 0x1F depending on size of op
|
||||
if (Size == 64) {
|
||||
Src = _And(Src, _Constant(0x3F));
|
||||
}
|
||||
else {
|
||||
Src = _And(Src, _Constant(0x1F));
|
||||
}
|
||||
|
||||
OrderedNode *Result = _Lshl(Dest, Src);
|
||||
OrderedNode *Result = _Lshl(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
|
||||
if (Size < 32) {
|
||||
@ -1866,17 +1858,7 @@ void OpDispatchBuilder::SHROp(OpcodeArgs) {
|
||||
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
|
||||
}
|
||||
|
||||
const auto Size = GetSrcBitSize(Op);
|
||||
|
||||
// x86 masks the shift by 0x3F or 0x1F depending on size of op
|
||||
if (Size == 64) {
|
||||
Src = _And(Src, _Constant(0x3F));
|
||||
}
|
||||
else {
|
||||
Src = _And(Src, _Constant(0x1F));
|
||||
}
|
||||
|
||||
auto ALUOp = _Lshr(Dest, Src);
|
||||
auto ALUOp = _Lshr(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
|
||||
StoreResult(GPRClass, Op, ALUOp, -1);
|
||||
|
||||
if constexpr (SHR1Bit) {
|
||||
@ -2117,18 +2099,11 @@ void OpDispatchBuilder::ASHROp(OpcodeArgs) {
|
||||
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
|
||||
}
|
||||
|
||||
// x86 masks the shift by 0x3F or 0x1F depending on size of op
|
||||
if (Size == 64) {
|
||||
Src = _And(Src, _Constant(Size, 0x3F));
|
||||
} else {
|
||||
Src = _And(Src, _Constant(Size, 0x1F));
|
||||
}
|
||||
|
||||
if (Size < 32) {
|
||||
Dest = _Sbfe(Size, 0, Dest);
|
||||
}
|
||||
|
||||
OrderedNode *Result = _Ashr(Dest, Src);
|
||||
OrderedNode *Result = _Ashr(std::max<uint8_t>(4, GetSrcSize(Op)), Dest, Src);
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
|
||||
if constexpr (SHR1Bit) {
|
||||
@ -2412,29 +2387,20 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {
|
||||
|
||||
auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
auto* Shift = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, -1);
|
||||
const auto OperandSize = GetSrcBitSize(Op);
|
||||
|
||||
// x86 masks the shift by 0x3F or 0x1F depending on size of op
|
||||
auto SanitizedShift = [&] {
|
||||
if (OperandSize == 64) {
|
||||
return _And(Shift, _Constant(0x3F));
|
||||
} else {
|
||||
return _And(Shift, _Constant(0x1F));
|
||||
}
|
||||
}();
|
||||
const auto Size = GetSrcSize(Op);
|
||||
|
||||
auto* Result = [&]() -> OrderedNode* {
|
||||
// SARX
|
||||
if (Op->OP == 0x6F7) {
|
||||
return _Ashr(Src, SanitizedShift);
|
||||
return _Ashr(Size, Src, Shift);
|
||||
}
|
||||
// SHLX
|
||||
if (Op->OP == 0x5F7) {
|
||||
return _Lshl(Src, SanitizedShift);
|
||||
return _Lshl(Size, Src, Shift);
|
||||
}
|
||||
|
||||
// SHRX
|
||||
return _Lshr(Src, SanitizedShift);
|
||||
return _Lshr(Size, Src, Shift);
|
||||
}();
|
||||
|
||||
StoreResult(GPRClass, Op, Result, -1);
|
||||
@ -2631,19 +2597,12 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
|
||||
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
|
||||
|
||||
// x86 masks the shift by 0x3F or 0x1F depending on size of op
|
||||
if (Size == 64) {
|
||||
Src = _And(Src, _Constant(Size, 0x3F));
|
||||
} else {
|
||||
Src = _And(Src, _Constant(Size, 0x1F));
|
||||
}
|
||||
|
||||
// Res = Src >> Shift
|
||||
OrderedNode *Res = _Lshr(Dest, Src);
|
||||
|
||||
// Res |= (Src << (Size - Shift + 1));
|
||||
OrderedNode *SrcShl = _Sub(_Constant(Size, Size + 1), Src);
|
||||
auto TmpHigher = _Lshl(Dest, SrcShl);
|
||||
auto TmpHigher = _Lshl(GetSrcSize(Op), Dest, SrcShl);
|
||||
|
||||
auto One = _Constant(Size, 1);
|
||||
auto Zero = _Constant(Size, 0);
|
||||
@ -2780,15 +2739,8 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) {
|
||||
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, -1);
|
||||
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_LOC);
|
||||
|
||||
// x86 masks the shift by 0x3F or 0x1F depending on size of op
|
||||
if (Size == 64) {
|
||||
Src = _And(Src, _Constant(Size, 0x3F));
|
||||
} else {
|
||||
Src = _And(Src, _Constant(Size, 0x1F));
|
||||
}
|
||||
|
||||
// Res = Src << Shift
|
||||
OrderedNode *Res = _Lshl(Dest, Src);
|
||||
OrderedNode *Res = _Lshl(GetSrcSize(Op), Dest, Src);
|
||||
|
||||
// Res |= (Src << (Size - Shift + 1));
|
||||
OrderedNode *SrcShl = _Sub(_Constant(Size, Size + 1), Src);
|
||||
|
21
External/FEXCore/Source/Interface/IR/IR.json
vendored
21
External/FEXCore/Source/Interface/IR/IR.json
vendored
@ -787,20 +787,29 @@
|
||||
"Desc": ["Integer binary AND NOT. Performs the equivalent of Src1 & ~Src2"],
|
||||
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
|
||||
},
|
||||
"GPR = Lshl GPR:$Src1, GPR:$Src2": {
|
||||
"GPR = Lshl u8:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Integer logical shift left"
|
||||
],
|
||||
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
|
||||
"EmitValidation": [
|
||||
"Size >= 4"
|
||||
],
|
||||
"DestSize": "Size"
|
||||
},
|
||||
"GPR = Lshr GPR:$Src1, GPR:$Src2": {
|
||||
"GPR = Lshr u8:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Integer logical shift right"
|
||||
],
|
||||
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
|
||||
"EmitValidation": [
|
||||
"Size >= 4"
|
||||
],
|
||||
"DestSize": "Size"
|
||||
},
|
||||
"GPR = Ashr GPR:$Src1, GPR:$Src2": {
|
||||
"GPR = Ashr u8:#Size, GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Integer arithmetic shift right"
|
||||
],
|
||||
"DestSize": "std::max<uint8_t>(4, GetOpSize(_Src1))"
|
||||
"EmitValidation": [
|
||||
"Size >= 4"
|
||||
],
|
||||
"DestSize": "Size"
|
||||
},
|
||||
"GPR = Ror GPR:$Src1, GPR:$Src2": {
|
||||
"Desc": ["Integer rotate right"
|
||||
|
@ -93,6 +93,15 @@ friend class FEXCore::IR::PassManager;
|
||||
IRPair<IROp_StoreMemTSO> _StoreMemTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, OrderedNode *Addr, OrderedNode *Value, uint8_t Align = 1) {
|
||||
return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1);
|
||||
}
|
||||
IRPair<IROp_Lshl> _Lshl(OrderedNode *Src1, OrderedNode *Src2) {
|
||||
return _Lshl(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
|
||||
}
|
||||
IRPair<IROp_Lshr> _Lshr(OrderedNode *Src1, OrderedNode *Src2) {
|
||||
return _Lshr(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
|
||||
}
|
||||
IRPair<IROp_Ashr> _Ashr(OrderedNode *Src1, OrderedNode *Src2) {
|
||||
return _Ashr(std::max<uint8_t>(4, GetOpSize(Src1)), Src1, Src2);
|
||||
}
|
||||
OrderedNode *Invalid() {
|
||||
return InvalidNode;
|
||||
}
|
||||
|
@ -22,14 +22,14 @@
|
||||
%AddrA i64 = Constant #0x1000000
|
||||
%MemValueA i32 = LoadMem GPR, #4, %AddrA i64, %Invalid, #4, SXTX, #1
|
||||
%Shift i64 = Constant #0x1
|
||||
%ResultA i32 = Lshl %MemValueA, %Shift
|
||||
%ResultB i64 = Lshl %MemValueA, %Shift
|
||||
%ResultA i32 = Lshl #4, %MemValueA, %Shift
|
||||
%ResultB i64 = Lshl #8, %MemValueA, %Shift
|
||||
(%Store i64) StoreRegister %ResultA i64, #0, #0x8, GPR, GPRFixed, #8
|
||||
(%Store i64) StoreRegister %ResultB i64, #0, #0x20, GPR, GPRFixed, #8
|
||||
; Constant optimisable version
|
||||
%ValueB i64 = Constant #0x87654321
|
||||
%ResultC i32 = Lshl %ValueB, %Shift
|
||||
%ResultD i64 = Lshl %ValueB, %Shift
|
||||
%ResultC i32 = Lshl #4, %ValueB, %Shift
|
||||
%ResultD i64 = Lshl #8, %ValueB, %Shift
|
||||
(%Store i64) StoreRegister %ResultC i64, #0, #0x10, GPR, GPRFixed, #8
|
||||
(%Store i64) StoreRegister %ResultD i64, #0, #0x18, GPR, GPRFixed, #8
|
||||
(%ssa7 i0) Break {0.11.0.128}
|
||||
|
Loading…
x
Reference in New Issue
Block a user