Arm64/VectorOps: Clamp shift amount to esize-1 for VSShr

Makes the behavior consistent with the x86 JIT.

We need to treat values larger than 31 as if they were 31 bit shifts in
order to handle sign-extending behavior properly.
This commit is contained in:
Lioncache 2023-01-31 22:38:42 -05:00
parent fe79f61fc3
commit 4177d5c185
2 changed files with 15 additions and 6 deletions

View File

@ -2025,6 +2025,7 @@ DEF_OP(VSShr) {
const auto ElementSize = IROp->ElementSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;
const auto MaxShift = (ElementSize * 8) - 1;
const auto Dst = GetVReg(Node);
const auto ShiftVector = GetVReg(Op->ShiftVector.ID());
@ -2040,13 +2041,21 @@ DEF_OP(VSShr) {
if (HostSupportsSVE && Is256Bit) {
const auto Mask = PRED_TMP_32B.Merging();
dup_imm(SubRegSize, VTMP2.Z(), MaxShift);
umin(SubRegSize, VTMP2.Z(), Mask, VTMP2.Z(), ShiftVector.Z());
movprfx(VTMP1.Z(), Vector.Z());
asr(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), ShiftVector.Z());
asr(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), VTMP2.Z());
mov(Dst.Z(), VTMP1.Z());
} else {
LOGMAN_THROW_AA_FMT(ElementSize != 8, "Adv. SIMD UMIN doesn't handle 64-bit values");
movi(SubRegSize, VTMP1.Q(), MaxShift);
umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q());
// Need to invert shift values to perform a right shift with SSHL
// (SSHR only has an immediate variant).
neg(SubRegSize, VTMP1.Q(), ShiftVector.Q());
neg(SubRegSize, VTMP1.Q(), VTMP1.Q());
sshl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
}
}

View File

@ -2,8 +2,8 @@
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM2": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0x0000000000000000"],
"XMM3": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0x0000000000000000"],
"XMM2": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"],
"XMM3": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"],
"XMM4": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"]
},
@ -31,9 +31,9 @@ align 32
dq 0x800000007FFFFFFF
dq 0x0FFFFFFFFFFFFFFF
dq 0x4000000055555555
dq 0x0000000000000001
dq 0xFFFFFFFF7FFFFFFF
dq 0x0000000800000010
dq 0x0000002000000020
dq 0x0000000400000008
dq 0x0000000100000001
dq 0xFFFFFFFFFFFFFFFF