FEX/unittests/ASM/VEX/vaddsubpd.asm
Lioncache 24f2796141 VectorOps: Handle SVE VFCADD a little better
If no registers alias, then we can move the first source directly into the
destination and then perform the FCADD operation as opposed to using a
temporary.
2023-10-19 14:48:46 +02:00

56 lines
1.7 KiB
NASM

%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM2" : ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM3" : ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM4" : ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM5" : ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM6" : ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
"XMM7" : ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
"XMM8" : ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
"XMM9" : ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
"XMM10": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
"XMM11": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vaddsubpd xmm2, xmm0, [rdx + 32]
vaddsubpd xmm3, xmm0, xmm1
vaddsubpd xmm4, xmm1, [rdx]
vaddsubpd xmm5, xmm1, xmm0
vaddsubpd ymm6, ymm0, [rdx + 32]
vaddsubpd ymm7, ymm0, ymm1
vaddsubpd ymm8, ymm1, [rdx]
vaddsubpd ymm9, ymm1, ymm0
; Aliasing source/destination vectors
vmovapd ymm10, [rdx]
vaddsubpd ymm10, ymm10, ymm1
vmovapd ymm11, [rdx + 32]
vaddsubpd ymm11, ymm0, ymm11
hlt
align 32
.data:
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000