diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp index 46ca26532..af0c2b171 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp @@ -2025,6 +2025,7 @@ DEF_OP(VSShr) { const auto ElementSize = IROp->ElementSize; const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + const auto MaxShift = (ElementSize * 8) - 1; const auto Dst = GetVReg(Node); const auto ShiftVector = GetVReg(Op->ShiftVector.ID()); @@ -2040,13 +2041,21 @@ DEF_OP(VSShr) { if (HostSupportsSVE && Is256Bit) { const auto Mask = PRED_TMP_32B.Merging(); + dup_imm(SubRegSize, VTMP2.Z(), MaxShift); + umin(SubRegSize, VTMP2.Z(), Mask, VTMP2.Z(), ShiftVector.Z()); + movprfx(VTMP1.Z(), Vector.Z()); - asr(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), ShiftVector.Z()); + asr(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), VTMP2.Z()); mov(Dst.Z(), VTMP1.Z()); } else { + LOGMAN_THROW_AA_FMT(ElementSize != 8, "Adv. SIMD UMIN doesn't handle 64-bit values"); + + movi(SubRegSize, VTMP1.Q(), MaxShift); + umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q()); + // Need to invert shift values to perform a right shift with SSHL // (SSHR only has an immediate variant). - neg(SubRegSize, VTMP1.Q(), ShiftVector.Q()); + neg(SubRegSize, VTMP1.Q(), VTMP1.Q()); sshl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q()); } } diff --git a/unittests/ASM/VEX/vpsravd.asm b/unittests/ASM/VEX/vpsravd.asm index e7903b755..357a581a1 100644 --- a/unittests/ASM/VEX/vpsravd.asm +++ b/unittests/ASM/VEX/vpsravd.asm @@ -2,8 +2,8 @@ { "HostFeatures": ["AVX"], "RegData": { - "XMM2": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0x0000000000000000"], - "XMM3": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0x0000000000000000"], + "XMM2": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"], + "XMM3": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"], "XMM4": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"], "XMM5": ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"] }, @@ -31,9 +31,9 @@ align 32 dq 0x800000007FFFFFFF dq 0x0FFFFFFFFFFFFFFF dq 0x4000000055555555 -dq 0x0000000000000001 +dq 0xFFFFFFFF7FFFFFFF dq 0x0000000800000010 dq 0x0000002000000020 dq 0x0000000400000008 -dq 0x0000000100000001 +dq 0xFFFFFFFFFFFFFFFF