mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-08 22:52:51 +00:00
VectorOps: Handle SVE VFCADD a little better
If no registers alias, then we can move the first source directly into the destination and then perform the FCADD operation as opposed to using a temporary.
This commit is contained in:
parent
8f246b206b
commit
24f2796141
@ -4963,13 +4963,16 @@ DEF_OP(VFCADD) {
|
||||
// Trivial case where we already have first vector in the destination
|
||||
// register. We can just do the operation in place.
|
||||
fcadd(SubRegSize, Dst.Z(), Mask, Vector1.Z(), Vector2.Z(), Rotate);
|
||||
}
|
||||
else {
|
||||
} else if (Dst == Vector2) {
|
||||
// SVE FCADD is a destructive operation, so we need
|
||||
// a temporary for performing operations.
|
||||
movprfx(VTMP1.Z(), Vector1.Z());
|
||||
fcadd(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), Vector2.Z(), Rotate);
|
||||
mov(Dst.Z(), VTMP1.Z());
|
||||
} else {
|
||||
// We have no source/dest aliasing, so we can move into the destination.
|
||||
movprfx(Dst.Z(), Vector1.Z());
|
||||
fcadd(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z(), Rotate);
|
||||
}
|
||||
} else {
|
||||
if (OpSize == 8) {
|
||||
|
@ -2,17 +2,16 @@
|
||||
{
|
||||
"HostFeatures": ["AVX"],
|
||||
"RegData": {
|
||||
"XMM2": ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM3": ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM4": ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM5": ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM6": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
|
||||
"XMM7": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
|
||||
"XMM8": ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
|
||||
"XMM9": ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"]
|
||||
},
|
||||
"MemoryRegions": {
|
||||
"0x100000000": "4096"
|
||||
"XMM2" : ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM3" : ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM4" : ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM5" : ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM6" : ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
|
||||
"XMM7" : ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
|
||||
"XMM8" : ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
|
||||
"XMM9" : ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
|
||||
"XMM10": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
|
||||
"XMM11": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"]
|
||||
}
|
||||
}
|
||||
%endif
|
||||
@ -34,6 +33,13 @@ vaddsubpd ymm7, ymm0, ymm1
|
||||
vaddsubpd ymm8, ymm1, [rdx]
|
||||
vaddsubpd ymm9, ymm1, ymm0
|
||||
|
||||
; Aliasing source/destination vectors
|
||||
vmovapd ymm10, [rdx]
|
||||
vaddsubpd ymm10, ymm10, ymm1
|
||||
|
||||
vmovapd ymm11, [rdx + 32]
|
||||
vaddsubpd ymm11, ymm0, ymm11
|
||||
|
||||
hlt
|
||||
|
||||
align 32
|
||||
|
@ -5,10 +5,9 @@
|
||||
"XMM2": ["0x41200000C0000000", "0x41200000C0C00000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM3": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"],
|
||||
"XMM4": ["0x41200000C0000000", "0x41200000C0C00000", "0x0000000000000000", "0x0000000000000000"],
|
||||
"XMM5": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"]
|
||||
},
|
||||
"MemoryRegions": {
|
||||
"0x100000000": "4096"
|
||||
"XMM5": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"],
|
||||
"XMM6": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"],
|
||||
"XMM7": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"]
|
||||
}
|
||||
}
|
||||
%endif
|
||||
@ -24,6 +23,13 @@ vaddsubps ymm3, ymm0, [rdx + 32]
|
||||
vaddsubps xmm4, xmm0, xmm1
|
||||
vaddsubps ymm5, ymm0, ymm1
|
||||
|
||||
; Aliasing source/destination vectors
|
||||
vmovapd ymm6, [rdx]
|
||||
vaddsubps ymm6, ymm6, ymm1
|
||||
|
||||
vmovapd ymm7, [rdx + 32]
|
||||
vaddsubps ymm7, ymm0, ymm7
|
||||
|
||||
hlt
|
||||
|
||||
align 32
|
||||
|
@ -20,8 +20,37 @@
|
||||
"fcadd v16.2d, v17.2d, v2.2d, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubpd ymm0, ymm0, ymm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Aliasing source and destination",
|
||||
"Map 1 0b01 0xd0 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movprfx z1, z18",
|
||||
"ext z1.b, z1.b, z18.b, #8",
|
||||
"mov z2.d, z1.d",
|
||||
"fcadd z16.d, p7/m, z16.d, z2.d, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubpd ymm0, ymm1, ymm0": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Aliasing source and destination",
|
||||
"Map 1 0b01 0xd0 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movprfx z1, z16",
|
||||
"ext z1.b, z1.b, z16.b, #8",
|
||||
"mov z2.d, z1.d",
|
||||
"movprfx z16, z17",
|
||||
"fcadd z16.d, p7/m, z16.d, z2.d, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubpd ymm0, ymm1, ymm2": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b01 0xd0 256-bit"
|
||||
@ -30,9 +59,8 @@
|
||||
"movprfx z1, z18",
|
||||
"ext z1.b, z1.b, z18.b, #8",
|
||||
"mov z2.d, z1.d",
|
||||
"movprfx z0, z17",
|
||||
"fcadd z0.d, p7/m, z0.d, z2.d, #90",
|
||||
"mov z16.d, z0.d"
|
||||
"movprfx z16, z17",
|
||||
"fcadd z16.d, p7/m, z16.d, z2.d, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubps xmm0, xmm1, xmm2": {
|
||||
@ -46,17 +74,41 @@
|
||||
"fcadd v16.4s, v17.4s, v2.4s, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubps ymm0, ymm1, ymm0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Aliasing source and destination",
|
||||
"Map 1 0b11 0xd0 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"revw z2.d, p7/m, z16.d",
|
||||
"movprfx z16, z17",
|
||||
"fcadd z16.s, p7/m, z16.s, z2.s, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubps ymm0, ymm0, ymm2": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Aliasing source and destination",
|
||||
"Map 1 0b11 0xd0 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"revw z2.d, p7/m, z18.d",
|
||||
"fcadd z16.s, p7/m, z16.s, z2.s, #90"
|
||||
]
|
||||
},
|
||||
"vaddsubps ymm0, ymm1, ymm2": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xd0 256-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"revw z2.d, p7/m, z18.d",
|
||||
"movprfx z0, z17",
|
||||
"fcadd z0.s, p7/m, z0.s, z2.s, #90",
|
||||
"mov z16.d, z0.d"
|
||||
"movprfx z16, z17",
|
||||
"fcadd z16.s, p7/m, z16.s, z2.s, #90"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user