mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-15 12:28:36 +00:00
Arm64/VectorOps: Clamp shift amount to esize-1 for VSShr
Makes the behavior consistent with the x86 JIT. We need to treat values larger than 31 as if they were 31 bit shifts in order to handle sign-extending behavior properly.
This commit is contained in:
parent
fe79f61fc3
commit
4177d5c185
@ -2025,6 +2025,7 @@ DEF_OP(VSShr) {
|
||||
|
||||
const auto ElementSize = IROp->ElementSize;
|
||||
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
|
||||
const auto MaxShift = (ElementSize * 8) - 1;
|
||||
|
||||
const auto Dst = GetVReg(Node);
|
||||
const auto ShiftVector = GetVReg(Op->ShiftVector.ID());
|
||||
@ -2040,13 +2041,21 @@ DEF_OP(VSShr) {
|
||||
if (HostSupportsSVE && Is256Bit) {
|
||||
const auto Mask = PRED_TMP_32B.Merging();
|
||||
|
||||
dup_imm(SubRegSize, VTMP2.Z(), MaxShift);
|
||||
umin(SubRegSize, VTMP2.Z(), Mask, VTMP2.Z(), ShiftVector.Z());
|
||||
|
||||
movprfx(VTMP1.Z(), Vector.Z());
|
||||
asr(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), ShiftVector.Z());
|
||||
asr(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), VTMP2.Z());
|
||||
mov(Dst.Z(), VTMP1.Z());
|
||||
} else {
|
||||
LOGMAN_THROW_AA_FMT(ElementSize != 8, "Adv. SIMD UMIN doesn't handle 64-bit values");
|
||||
|
||||
movi(SubRegSize, VTMP1.Q(), MaxShift);
|
||||
umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q());
|
||||
|
||||
// Need to invert shift values to perform a right shift with SSHL
|
||||
// (SSHR only has an immediate variant).
|
||||
neg(SubRegSize, VTMP1.Q(), ShiftVector.Q());
|
||||
neg(SubRegSize, VTMP1.Q(), VTMP1.Q());
|
||||
sshl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
|
||||
}
|
||||
}
|
||||
|
@ -2,8 +2,8 @@
|
||||
{
|
||||
"HostFeatures": ["AVX"],
|
||||
"RegData": {
|
||||
"XMM2": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0x0000000000000000"],
|
||||
"XMM3": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0x0000000000000000"],
|
||||
"XMM2": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"],
|
||||
"XMM3": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"],
|
||||
"XMM4": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM5": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"]
|
||||
},
|
||||
@ -31,9 +31,9 @@ align 32
|
||||
dq 0x800000007FFFFFFF
|
||||
dq 0x0FFFFFFFFFFFFFFF
|
||||
dq 0x4000000055555555
|
||||
dq 0x0000000000000001
|
||||
dq 0xFFFFFFFF7FFFFFFF
|
||||
|
||||
dq 0x0000000800000010
|
||||
dq 0x0000002000000020
|
||||
dq 0x0000000400000008
|
||||
dq 0x0000000100000001
|
||||
dq 0xFFFFFFFFFFFFFFFF
|
||||
|
Loading…
x
Reference in New Issue
Block a user