diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index c4c090b5c95..c9942c5f4ac 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -375,7 +375,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, - { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE } + { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, + // AVX-512 foldable instructions + { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { @@ -581,6 +583,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, + + // AVX-512 foldable instructions + { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, + { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, + { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 }, + { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 }, + { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 }, + { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -1180,12 +1190,35 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PEXT64rr, X86::PEXT64rm, 0 }, // AVX-512 foldable instructions + { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, + { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, + { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, + { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, + { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, + { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, + { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, + { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, + { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, + { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, + { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, + { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, + { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, + { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, + { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, + { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, + { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, + { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, + { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, + { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, + { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, + { X86::VALIGNQrri, X86::VALIGNQrmi, 0 }, + { X86::VALIGNDrri, X86::VALIGNDrmi, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { @@ -4010,6 +4043,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) { case X86::Int_VCVTSD2SSrr: case X86::VCVTSS2SDrr: case X86::Int_VCVTSS2SDrr: + case X86::VCVTSD2SSZrr: + case X86::VCVTSS2SDZrr: case X86::VRCPSSr: case X86::VROUNDSDr: case X86::VROUNDSDr_Int: @@ -5064,6 +5099,15 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VSQRTSSm: case X86::VSQRTSSm_Int: case X86::VSQRTSSr: + + case X86::VGATHERQPSZrm: + case X86::VGATHERQPDZrm: + case X86::VGATHERDPDZrm: + case X86::VGATHERDPSZrm: + case X86::VPGATHERQDZrm: + case X86::VPGATHERQQZrm: + case X86::VPGATHERDDZrm: + case X86::VPGATHERDQZrm: return true; } } diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll index 2b87d44fc57..0321e950ef8 100644 --- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll +++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll @@ -10,7 +10,7 @@ declare void @llvm.x86.avx512.scatter.qps.mask.512 (i8*, i8, <8 x i64>, <8 x flo declare <8 x double> @llvm.x86.avx512.gather.qpd.mask.512 (<8 x double>, i8, <8 x i64>, i8*, i32) declare void @llvm.x86.avx512.scatter.qpd.mask.512 (i8*, i8, <8 x i64>, <8 x double>, i32) -;CHECK: gather_mask_dps +;CHECK-LABEL: gather_mask_dps ;CHECK: kmovw ;CHECK: vgatherdps ;CHECK: vpadd @@ -23,7 +23,7 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* ret void } -;CHECK: gather_mask_dpd +;CHECK-LABEL: gather_mask_dpd ;CHECK: kmovw ;CHECK: vgatherdpd ;CHECK: vpadd @@ -36,7 +36,7 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %b ret void } -;CHECK: gather_mask_qps +;CHECK-LABEL: gather_mask_qps ;CHECK: kmovw ;CHECK: vgatherqps ;CHECK: vpadd @@ -49,7 +49,7 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %ba ret void } -;CHECK: gather_mask_qpd +;CHECK-LABEL: gather_mask_qpd ;CHECK: kmovw ;CHECK: vgatherqpd ;CHECK: vpadd @@ -74,7 +74,7 @@ declare void @llvm.x86.avx512.scatter.qpi.mask.512 (i8*, i8, <8 x i64>, <8 x i32 declare <8 x i64> @llvm.x86.avx512.gather.qpq.mask.512 (<8 x i64>, i8, <8 x i64>, i8*, i32) declare void @llvm.x86.avx512.scatter.qpq.mask.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) -;CHECK: gather_mask_dd +;CHECK-LABEL: gather_mask_dd ;CHECK: kmovw ;CHECK: vpgatherdd ;CHECK: vpadd @@ -87,7 +87,7 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %ba ret void } -;CHECK: gather_mask_qd +;CHECK-LABEL: gather_mask_qd ;CHECK: kmovw ;CHECK: vpgatherqd ;CHECK: vpadd @@ -100,7 +100,7 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, ret void } -;CHECK: gather_mask_qq +;CHECK-LABEL: gather_mask_qq ;CHECK: kmovw ;CHECK: vpgatherqq ;CHECK: vpadd @@ -113,7 +113,7 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, ret void } -;CHECK: gather_mask_dq +;CHECK-LABEL: gather_mask_dq ;CHECK: kmovw ;CHECK: vpgatherdq ;CHECK: vpadd @@ -135,7 +135,7 @@ declare void @llvm.x86.avx512.scatter.qps.512 (i8*, <8 x i64>, <8 x float>, i32) declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x i64>, i8*, i32) declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, <8 x i64>, <8 x double>, i32) -;CHECK: gather_dps +;CHECK-LABEL: gather_dps ;CHECK: kxnorw ;CHECK: vgatherdps ;CHECK: vscatterdps @@ -147,7 +147,7 @@ define void @gather_dps(<16 x i32> %ind, i8* %base, i8* %stbuf) { ret void } -;CHECK: gather_qps +;CHECK-LABEL: gather_qps ;CHECK: kxnorw ;CHECK: vgatherqps ;CHECK: vscatterqps @@ -159,7 +159,7 @@ define void @gather_qps(<8 x i64> %ind, i8* %base, i8* %stbuf) { ret void } -;CHECK: gather_qpd +;CHECK-LABEL: gather_qpd ;CHECK: kxnorw ;CHECK: vgatherqpd ;CHECK: vpadd @@ -184,7 +184,7 @@ declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, <8 x i64>, <8 x i32>, i32) declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, i32) declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, <8 x i64>, <8 x i64>, i32) -;CHECK: gather_dpi +;CHECK-LABEL: gather_dpi ;CHECK: kxnorw ;CHECK: vpgatherdd ;CHECK: vpscatterdd @@ -196,7 +196,8 @@ define void @gather_dpi(<16 x i32> %ind, i8* %base, i8* %stbuf) { ret void } -;CHECK: gather_qpq +;CHECK-LABEL: gather_qpq +;CHECK: vpxord %zmm ;CHECK: kxnorw ;CHECK: vpgatherqq ;CHECK: vpadd @@ -209,7 +210,8 @@ define void @gather_qpq(<8 x i64> %ind, i8* %base, i8* %stbuf) { ret void } -;CHECK: gather_qpi +;CHECK-LABEL: gather_qpi +;CHECK: vpxor %ymm ;CHECK: kxnorw ;CHECK: vpgatherqd ;CHECK: vpadd