mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-05 13:57:24 +00:00
Merge pull request #2208 from lioncash/zero
OpcodeDispatcher: Explicitly zero upper lanes
This commit is contained in:
commit
e9aa368a62
@ -4971,12 +4971,6 @@ OrderedNode *OpDispatchBuilder::LoadSource_WithOpSize(FEXCore::IR::RegisterClass
|
||||
if (OpSize < Core::CPUState::XMM_SSE_REG_SIZE) {
|
||||
Src = _VMov(OpSize, Src);
|
||||
}
|
||||
|
||||
// OpSize of 16 is special in that it is expected to zero the upper bits of the 256-bit operation.
|
||||
// TODO: Longer term we should enforce the difference between zero and insert.
|
||||
if (regSize == Core::CPUState::XMM_AVX_REG_SIZE && OpSize == Core::CPUState::XMM_SSE_REG_SIZE) {
|
||||
Src = _VMov(OpSize, Src);
|
||||
}
|
||||
}
|
||||
else {
|
||||
Src = _LoadRegister(false, offsetof(FEXCore::Core::CPUState, gregs[gpr]) + (highIndex ? 1 : 0), GPRClass, GPRFixedClass, OpSize);
|
||||
|
@ -35,11 +35,12 @@ void OpDispatchBuilder::MOVVectorNTOp(OpcodeArgs) {
|
||||
|
||||
void OpDispatchBuilder::VMOVVectorNTOp(OpcodeArgs) {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 1, true, false, MemoryAccessType::ACCESS_STREAM);
|
||||
const auto Is128BitDest = GetDstSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
// TODO: When stores and loads gain the ability to explicitly express
|
||||
// whether a vector extension or an insert is desirable, ensure
|
||||
// the 128-bit case here is a zero extend on store if the destination
|
||||
// is a register.
|
||||
if (Op->Dest.IsGPR() && Is128BitDest) {
|
||||
// Clear the upper lane
|
||||
Src = _VMov(16, Src);
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Src, 1, MemoryAccessType::ACCESS_STREAM);
|
||||
}
|
||||
@ -54,11 +55,11 @@ void OpDispatchBuilder::VMOVAPS_VMOVAPD_Op(OpcodeArgs) {
|
||||
const auto Is128BitDest = GetDstSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
if (Op->Dest.IsGPR() && Is128BitDest) {
|
||||
// Perform 32 byte store to clear the upper lane.
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 32, -1);
|
||||
} else {
|
||||
StoreResult(FPRClass, Op, Src, -1);
|
||||
// Clear the upper lane
|
||||
Src = _VMov(16, Src);
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Src, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::VMOVUPS_VMOVUPD_Op(OpcodeArgs) {
|
||||
@ -66,11 +67,11 @@ void OpDispatchBuilder::VMOVUPS_VMOVUPD_Op(OpcodeArgs) {
|
||||
const auto Is128BitDest = GetDstSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
if (Op->Dest.IsGPR() && Is128BitDest) {
|
||||
// Perform 32 byte store to clear the upper lane.
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 32, 1);
|
||||
} else {
|
||||
StoreResult(FPRClass, Op, Src, 1);
|
||||
// Clear the upper lane
|
||||
Src = _VMov(16, Src);
|
||||
}
|
||||
|
||||
StoreResult(FPRClass, Op, Src, 1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MOVUPSOp(OpcodeArgs) {
|
||||
@ -108,7 +109,11 @@ void OpDispatchBuilder::VMOVHPOp(OpcodeArgs) {
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 16);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, 8);
|
||||
OrderedNode *Result = _VInsElement(16, 8, 1, 0, Src1, Src2);
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1);
|
||||
|
||||
// Clear the upper lane.
|
||||
Result = _VMov(16, Result);
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
} else {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 16);
|
||||
OrderedNode *Result = _VInsElement(16, 8, 0, 1, Src, Src);
|
||||
@ -142,7 +147,11 @@ void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) {
|
||||
OrderedNode *Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 16);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, 8);
|
||||
OrderedNode *Result = _VInsElement(16, 8, 0, 0, Src1, Src2);
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1);
|
||||
|
||||
// Clear the upper lane.
|
||||
Result = _VMov(16, Result);
|
||||
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
} else {
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 8);
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, 8, 8);
|
||||
@ -166,9 +175,12 @@ void OpDispatchBuilder::VMOVSHDUPOp(OpcodeArgs) {
|
||||
if (Is256Bit) {
|
||||
Result = _VInsElement(SrcSize, 4, 4, 5, Result, Src);
|
||||
Result = _VInsElement(SrcSize, 4, 6, 7, Result, Src);
|
||||
} else {
|
||||
// Clear upper lane
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MOVSLDUPOp(OpcodeArgs) {
|
||||
@ -188,9 +200,12 @@ void OpDispatchBuilder::VMOVSLDUPOp(OpcodeArgs) {
|
||||
if (Is256Bit) {
|
||||
Result = _VInsElement(SrcSize, 4, 5, 4, Result, Src);
|
||||
Result = _VInsElement(SrcSize, 4, 7, 6, Result, Src);
|
||||
} else {
|
||||
// Clear upper lane
|
||||
Result = _VMov(16, Result);
|
||||
}
|
||||
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1);
|
||||
StoreResult(FPRClass, Op, Result, -1);
|
||||
}
|
||||
|
||||
void OpDispatchBuilder::MOVSSOp(OpcodeArgs) {
|
||||
@ -1134,9 +1149,12 @@ void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) {
|
||||
OrderedNode *Res = _VInsElement(SrcSize, 8, 1, 0, Src, Src);
|
||||
if (Is256Bit) {
|
||||
Res = _VInsElement(SrcSize, 8, 3, 2, Res, Src);
|
||||
} else {
|
||||
// Clear the upper lane
|
||||
Res = _VMov(16, Res);
|
||||
}
|
||||
|
||||
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Res, 32, -1);
|
||||
StoreResult(FPRClass, Op, Res, -1);
|
||||
}
|
||||
|
||||
template<size_t DstElementSize>
|
||||
|
Loading…
x
Reference in New Issue
Block a user