[X86] Fix VPPERM load folding latency

Noticed while investigating BITREVERSE cost numbers with the D103695 script - VPPERM folded loads was using the WriteVarShuffleX defaults and was missing an override like the VPPERM reg-reg variants
This commit is contained in:
Simon Pilgrim 2022-09-09 13:57:39 +01:00
parent 72730c3f0e
commit 05f56f10ed
2 changed files with 8 additions and 2 deletions

View File

@ -1186,6 +1186,12 @@ def PdWriteVPPERM : SchedWriteRes<[PdFPU01, PdFPMAL]> {
}
def : InstRW<[PdWriteVPPERM], (instrs VPPERMrrr, VPPERMrrr_REV)>;
def PdWriteVPPERMLd : SchedWriteRes<[PdFPU01, PdFPMAL, PdLoad]> {
let Latency = 7;
let ResourceCycles = [1, 3, 3];
}
def : InstRW<[PdWriteVPPERMLd], (instrs VPPERMrrm, VPPERMrmr)>;
defm : PdWriteResXMMPair<WriteBlend, [PdFPU01, PdFPMAL], 2>;
defm : X86WriteResPairUnsupported<WriteBlendY>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;

View File

@ -322,8 +322,8 @@ vpshlw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 4 1.00 vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 9 1.50 * vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 2 1.50 vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 1 8 1.50 * vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1 8 1.50 * vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 7 1.50 * vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1 7 1.50 * vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 3 1.00 vprotb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1 8 1.50 * vprotb (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 8 1.50 * vprotb %xmm0, (%rax), %xmm3