[X86] Add vector element insertion/extraction scheduler classes

Split off pinsr/pextr and extractps instructions.

(Mostly) fixes PR36887.

Note: It might be worth adding a WriteFInsertLd class as well in the future.

Differential Revision: https://reviews.llvm.org/D45929

llvm-svn: 330714
This commit is contained in:
Simon Pilgrim 2018-04-24 13:21:41 +00:00
parent c8e4fc2a95
commit 3603cba798
17 changed files with 200 additions and 260 deletions

View File

@ -1085,14 +1085,14 @@ def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
(ins VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX, VEX_WIG, Sched<[WriteFBlend]>;
EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>,
EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFBlendLd, WriteRMW]>;
EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
@ -9878,7 +9878,7 @@ multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
addr:$dst)]>,
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
}
multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
@ -9888,7 +9888,7 @@ multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
EVEX, TAPD, Sched<[WriteShuffle]>;
EVEX, TAPD, Sched<[WriteVecExtract]>;
defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
}
@ -9901,14 +9901,14 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
EVEX, PD, Sched<[WriteShuffle]>;
EVEX, PD, Sched<[WriteVecExtract]>;
let hasSideEffects = 0 in
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
EVEX, TAPD, FoldGenData<NAME#rr>,
Sched<[WriteShuffle]>;
Sched<[WriteVecExtract]>;
defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
}
@ -9922,7 +9922,7 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GRC:$dst,
(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
EVEX, TAPD, Sched<[WriteShuffle]>;
EVEX, TAPD, Sched<[WriteVecExtract]>;
def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
@ -9930,7 +9930,7 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
[(store (extractelt (_.VT _.RC:$src1),
imm:$src2),addr:$dst)]>,
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
Sched<[WriteShuffleLd, WriteRMW]>;
Sched<[WriteVecExtractSt]>;
}
}
@ -9946,7 +9946,7 @@ multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
@ -9957,7 +9957,7 @@ multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
Sched<[WriteShuffle]>;
Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
}
@ -9971,7 +9971,7 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
EVEX_4V, TAPD, Sched<[WriteShuffle]>;
EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
_.ScalarLdFrag>, TAPD;

View File

@ -528,7 +528,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg,
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
imm:$src2))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecExtract]>;
let Constraints = "$src1 = $dst" in {
let Predicates = [HasSSE1] in {
def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
@ -537,7 +537,7 @@ let Predicates = [HasSSE1] in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32orGR64:$src2, imm:$src3))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecInsert]>;
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
@ -546,7 +546,7 @@ let Predicates = [HasSSE1] in {
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
}

View File

@ -3782,7 +3782,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecInsert]>;
def rm : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, u8imm:$src3),
@ -3792,7 +3792,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
imm:$src3))]>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
// Extract
@ -3802,13 +3802,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>,
PD, VEX, Sched<[WriteShuffle]>;
PD, VEX, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecExtract]>;
// Insert
let Predicates = [HasAVX, NoBWI] in
@ -5085,15 +5085,14 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
imm:$src2))]>,
Sched<[WriteShuffle]>;
let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteShuffleLd, WriteRMW] in
Sched<[WriteVecExtract]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))),
addr:$dst)]>;
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoBWI] in
@ -5109,16 +5108,15 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
Sched<[WriteShuffle]>, FoldGenData<NAME#ri>;
Sched<[WriteVecExtract]>, FoldGenData<NAME#ri>;
let hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteShuffleLd, WriteRMW] in
let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))),
addr:$dst)]>;
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoBWI] in
@ -5135,14 +5133,13 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
(extractelt (v4i32 VR128:$src1), imm:$src2))]>,
Sched<[WriteShuffle]>;
let SchedRW = [WriteShuffleLd, WriteRMW] in
Sched<[WriteVecExtract]>;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
addr:$dst)]>;
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoDQI] in
@ -5158,14 +5155,13 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR64:$dst,
(extractelt (v2i64 VR128:$src1), imm:$src2))]>,
Sched<[WriteShuffle]>;
let SchedRW = [WriteShuffleLd, WriteRMW] in
Sched<[WriteVecExtract]>;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i64mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v2i64 VR128:$src1), imm:$src2),
addr:$dst)]>;
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoDQI] in
@ -5182,14 +5178,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32orGR64:$dst,
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
Sched<[WriteFBlend]>;
let SchedRW = [WriteFBlendLd, WriteRMW] in
Sched<[WriteVecExtract]>;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
addr:$dst)]>;
addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let ExeDomain = SSEPackedSingle in {
@ -5223,7 +5218,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecInsert]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, u8imm:$src3),
!if(Is2Addr,
@ -5232,7 +5227,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoBWI] in
@ -5249,7 +5244,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecInsert]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i32mem:$src2, u8imm:$src3),
!if(Is2Addr,
@ -5258,7 +5253,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoDQI] in
@ -5275,7 +5270,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
Sched<[WriteShuffle]>;
Sched<[WriteVecInsert]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i64mem:$src2, u8imm:$src3),
!if(Is2Addr,
@ -5284,7 +5279,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoDQI] in

View File

@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
def BroadwellModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and HW can decode 4
// All x86 instructions are modeled as a single micro-op, and BW can decode 4
// instructions per cycle.
let IssueWidth = 4;
let MicroOpBufferSize = 192; // Based on the reorder buffer.
@ -190,6 +190,26 @@ defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variab
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5>; // Vector PSADBW.
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [BWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [BWPort5,BWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtract, [BWPort0,BWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
// Conversion between integer and float.
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
@ -462,17 +482,6 @@ def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
"(V?)MOVUPD(Y?)mr",
"(V?)MOVUPS(Y?)mr")>;
def BWWriteResGroup11 : SchedWriteRes<[BWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWrr",
"(V?)PINSRBrr",
"(V?)PINSRDrr",
"(V?)PINSRQrr",
"(V?)PINSRWrr")>;
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
let Latency = 2;
let NumMicroOps = 2;
@ -505,15 +514,9 @@ def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[BWWriteResGroup15], (instregex "MMX_PEXTRWrr",
"VCVTPH2PS(Y?)rr",
def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
"(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr",
"(V?)EXTRACTPSrr",
"(V?)PEXTRBrr",
"(V?)PEXTRDrr",
"(V?)PEXTRQrr",
"(V?)PEXTRWrr",
"(V?)PSLLDrr",
"(V?)PSLLQrr",
"(V?)PSLLWrr",
@ -573,17 +576,6 @@ def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8",
"SBB8ri",
"SET(A|BE)r")>;
def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[BWWriteResGroup21], (instregex "(V?)EXTRACTPSmr",
"(V?)PEXTRBmr",
"(V?)PEXTRDmr",
"(V?)PEXTRQmr",
"(V?)PEXTRWmr")>;
def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;

View File

@ -189,6 +189,26 @@ defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>;
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [HWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [HWPort5,HWPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtract, [HWPort0,HWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtractSt, [HWPort4,HWPort5,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
@ -1092,17 +1112,6 @@ def HWWriteResGroup19 : SchedWriteRes<[HWPort237,HWPort0156]> {
}
def: InstRW<[HWWriteResGroup19], (instregex "SFENCE")>;
def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[HWWriteResGroup20], (instregex "(V?)EXTRACTPSmr",
"(V?)PEXTRBmr",
"(V?)PEXTRDmr",
"(V?)PEXTRQmr",
"(V?)PEXTRWmr")>;
def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
@ -1160,17 +1169,6 @@ def HWWriteResGroup26 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> {
def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
def HWWriteResGroup27 : SchedWriteRes<[HWPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[HWWriteResGroup27], (instregex "MMX_PINSRWrr",
"(V?)PINSRBrr",
"(V?)PINSRDrr",
"(V?)PINSRQrr",
"(V?)PINSRWrr")>;
def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
let Latency = 2;
let NumMicroOps = 2;
@ -1203,16 +1201,10 @@ def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[HWWriteResGroup31], (instregex "MMX_PEXTRWrr",
"VCVTPH2PSYrr",
def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr",
"VCVTPH2PSrr",
"(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr",
"(V?)EXTRACTPSrr",
"(V?)PEXTRBrr",
"(V?)PEXTRDrr",
"(V?)PEXTRQrr",
"(V?)PEXTRWrr",
"(V?)PSLLDrr",
"(V?)PSLLQrr",
"(V?)PSLLWrr",

View File

@ -173,6 +173,25 @@ defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5>;
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
let Latency = 2;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> {
let Latency = 7;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> {
let Latency = 3;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
let Latency = 5;
let NumMicroOps = 3;
}
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
@ -535,16 +554,6 @@ def SBWriteResGroup16_1 : SchedWriteRes<[SBPort1]> {
}
def: InstRW<[SBWriteResGroup16_1], (instrs BSWAP32r)>;
def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup17], (instregex "(V?)PINSRBrr",
"(V?)PINSRDrr",
"(V?)PINSRQrr",
"(V?)PINSRWrr")>;
def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
@ -590,16 +599,6 @@ def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
}
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
let Latency = 3;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SBWriteResGroup23], (instregex "(V?)PEXTRBrr",
"(V?)PEXTRDrr",
"(V?)PEXTRQrr",
"(V?)PEXTRWrr")>;
def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
let Latency = 3;
let NumMicroOps = 3;
@ -793,15 +792,6 @@ def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> {
def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr")>;
def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
let Latency = 5;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SBWriteResGroup39], (instregex "(V?)PEXTRBmr",
"VPEXTRDmr",
"VPEXTRWmr")>;
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@ -1009,10 +999,6 @@ def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm",
"(V?)PCMPGTBrm",
"(V?)PCMPGTDrm",
"(V?)PCMPGTWrm",
"(V?)PINSRBrm",
"(V?)PINSRDrm",
"(V?)PINSRQrm",
"(V?)PINSRWrm",
"(V?)PMAXSBrm",
"(V?)PMAXSDrm",
"(V?)PMAXSWrm",

View File

@ -187,6 +187,26 @@ defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector va
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKLPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [SKLPort5,SKLPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtract, [SKLPort0,SKLPort5]> {
let Latency = 3;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
// Conversion between integer and float.
defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
@ -571,12 +591,7 @@ def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr",
"MMX_PINSRWrr",
"(V?)PINSRBrr",
"(V?)PINSRDrr",
"(V?)PINSRQrr",
"(V?)PINSRWrr")>;
def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
let Latency = 2;
@ -671,17 +686,6 @@ def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8",
"SBB8i8",
"SBB8ri")>;
def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SKLWriteResGroup24], (instregex "(V?)EXTRACTPSmr",
"(V?)PEXTRBmr",
"(V?)PEXTRDmr",
"(V?)PEXTRQmr",
"(V?)PEXTRWmr")>;
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
@ -761,13 +765,7 @@ def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PEXTRWrr",
"(V?)EXTRACTPSrr",
"(V?)PEXTRBrr",
"(V?)PEXTRDrr",
"(V?)PEXTRQrr",
"(V?)PEXTRWrr",
"(V?)PTEST(Y?)rr")>;
def: InstRW<[SKLWriteResGroup31], (instregex "(V?)PTEST(Y?)rr")>;
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
let Latency = 3;

View File

@ -187,6 +187,26 @@ defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector var
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1,1], 1, 6>; // Vector PSADBW.
// Vector insert/extract operations.
def : WriteRes<WriteVecInsert, [SKXPort5]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
let Latency = 6;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
let Latency = 3;
let NumMicroOps = 2;
}
def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
}
// Conversion between integer and float.
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
@ -1035,20 +1055,7 @@ def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [2];
}
def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr",
"MMX_PINSRWrr",
"PINSRBrr",
"PINSRDrr",
"PINSRQrr",
"PINSRWrr",
"VPINSRBZrr",
"VPINSRBrr",
"VPINSRDZrr",
"VPINSRDrr",
"VPINSRQZrr",
"VPINSRQrr",
"VPINSRWZrr",
"VPINSRWrr")>;
def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
let Latency = 2;
@ -1163,27 +1170,6 @@ def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8",
"SBB8i8",
"SBB8ri")>;
def SKXWriteResGroup24 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr",
"PEXTRBmr",
"PEXTRDmr",
"PEXTRQmr",
"PEXTRWmr",
"VEXTRACTPSZmr(b?)",
"VEXTRACTPSmr",
"VPEXTRBZmr(b?)",
"VPEXTRBmr",
"VPEXTRDZmr(b?)",
"VPEXTRDmr",
"VPEXTRQZmr(b?)",
"VPEXTRQmr",
"VPEXTRWZmr(b?)",
"VPEXTRWmr")>;
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
@ -1455,25 +1441,7 @@ def SKXWriteResGroup33 : SchedWriteRes<[SKXPort0,SKXPort5]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
def: InstRW<[SKXWriteResGroup33], (instregex "EXTRACTPSrr",
"MMX_PEXTRWrr",
"PEXTRBrr",
"PEXTRDrr",
"PEXTRQrr",
"PEXTRWrr",
"PTESTrr",
"VEXTRACTPSZrr",
"VEXTRACTPSrr",
"VPEXTRBZrr",
"VPEXTRBrr",
"VPEXTRDZrr",
"VPEXTRDrr",
"VPEXTRQZrr",
"VPEXTRQrr",
"VPEXTRWZrr",
"VPEXTRWrr",
"VPTESTYrr",
"VPTESTrr")>;
def: InstRW<[SKXWriteResGroup33], (instregex "(V?)PTEST(Y?)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
let Latency = 3;

View File

@ -117,6 +117,11 @@ defm WriteVarBlend : X86SchedWritePair; // Vector variable blends.
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
// Vector insert/extract operations.
defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
// MOVMSK operations.
def WriteFMOVMSK : SchedWrite;
def WriteVecMOVMSK : SchedWrite;

View File

@ -251,6 +251,14 @@ defm : AtomWriteResPair<WriteShuffle256, [AtomPort0], [AtomPort0]>; // NOTE:
defm : AtomWriteResPair<WriteVarShuffle256, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
defm : AtomWriteResPair<WriteVarVecShift, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
////////////////////////////////////////////////////////////////////////////////
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
defm : AtomWriteResPair<WriteVecInsert, [AtomPort0], [AtomPort0], 1, 1>;
def : WriteRes<WriteVecExtract, [AtomPort0]>;
def : WriteRes<WriteVecExtractSt, [AtomPort0]>;
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////

View File

@ -385,23 +385,12 @@ defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn'
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
// Vector Extraction instructions.
// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
def JWritePEXTR : SchedWriteRes<[JFPU0, JFPA, JALU0]> { let Latency = 3; }
def : InstRW<[JWritePEXTR], (instrs MMX_PEXTRWrr,
EXTRACTPSrr, VEXTRACTPSrr,
PEXTRBrr, VPEXTRBrr,
PEXTRDrr, VPEXTRDrr,
PEXTRQrr, VPEXTRQrr,
PEXTRWrr, VPEXTRWrr, PEXTRWrr_REV, VPEXTRWrr_REV)>;
def JWritePEXTRSt : SchedWriteRes<[JFPU1, JSTC, JSAGU]> { let Latency = 3; }
def : InstRW<[JWritePEXTRSt], (instrs EXTRACTPSmr, VEXTRACTPSmr,
PEXTRBmr, VPEXTRBmr,
PEXTRDmr, VPEXTRDmr,
PEXTRQmr, VPEXTRQmr,
PEXTRWmr, VPEXTRWmr)>;
defm : JWriteResFpuPair<WriteVecInsert, [JFPU01, JVALU], 1>;
def : WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
def : WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU]> { let Latency = 3; }
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.

View File

@ -164,6 +164,16 @@ defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
// Vector insert/extract operations.
defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0], 1>;
def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
let Latency = 4;
let NumMicroOps = 2;
let ResourceCycles = [1, 2];
}
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////

View File

@ -233,6 +233,19 @@ defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
// Vector insert/extract operations.
defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
let Latency = 2;
let ResourceCycles = [1, 2];
}
def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1, 2, 3];
}
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
@ -987,22 +1000,6 @@ def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> {
}
def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
// PEXTR B/W/D/Q.
// r32,x,i.
def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
let Latency = 2;
let ResourceCycles = [1, 2];
}
def : InstRW<[ZnWritePEXTRr], (instregex "(V?)PEXTR(B|W|D|Q)rr", "MMX_PEXTRWrr")>;
def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> {
let Latency = 5;
let NumMicroOps = 2;
let ResourceCycles = [1, 2, 3];
}
// m8,x,i.
def : InstRW<[ZnWritePEXTRm], (instregex "(V?)PEXTR(B|W|D|Q)mr")>;
// VPBROADCAST B/W.
// x, m8/16.
def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {

View File

@ -2978,7 +2978,7 @@ declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
define i32 @test_pextrw(x86_mmx %a0) optsize {
; GENERIC-LABEL: test_pextrw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_pextrw:
@ -2993,7 +2993,7 @@ define i32 @test_pextrw(x86_mmx %a0) optsize {
;
; SANDY-LABEL: test_pextrw:
; SANDY: # %bb.0:
; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pextrw:
@ -3501,9 +3501,9 @@ declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
; GENERIC-LABEL: test_pinsrw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@ -3525,9 +3525,9 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
;
; SANDY-LABEL: test_pinsrw:
; SANDY: # %bb.0:
; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;

View File

@ -1903,7 +1903,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
; GENERIC-LABEL: test_pextrw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SLM-LABEL: test_pextrw:
@ -1915,7 +1915,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
; SANDY-SSE-LABEL: test_pextrw:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_pextrw:

View File

@ -268,9 +268,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 * pavgb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pavgw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pavgw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 pextrw $1, %mm0, %ecx
# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %mm2
# CHECK-NEXT: 2 6 1.00 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 2 3 1.00 pextrw $1, %mm0, %ecx
# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2
# CHECK-NEXT: 2 7 0.50 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pmaxsw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pmaxsw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pmaxub %mm0, %mm2
@ -331,7 +331,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - 112.00 40.00 54.00 10.00 35.00 33.50 33.50
# CHECK-NEXT: - 112.00 41.00 55.50 10.00 34.50 33.50 33.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@ -409,9 +409,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pavgw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - pextrw $1, %mm0, %ecx
# CHECK-NEXT: - - - - - 1.00 - - pinsrw $1, %eax, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pinsrw $1, (%rax), %mm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %mm0, %ecx
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrw $1, %eax, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrw $1, (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pmaxsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxsw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pmaxub %mm0, %mm2

View File

@ -188,7 +188,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 4 5 1.00 * pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 4 5 1.00 * pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 3 6 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 3 5 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1
@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 26.00 47.00 5.00 53.00 25.00 25.00
# CHECK-NEXT: - - 26.00 47.50 5.00 52.50 24.50 24.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@ -301,7 +301,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrq $1, %xmm0, %rcx
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - 1.00 1.00 1.00 1.00 pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - - phminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 phminposuw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrb $1, %eax, %xmm1