mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-26 05:18:46 +00:00
[X86] Use vector widening to support zero extend from i1 when the dest type is not 512-bits and vlx is not enabled.
Previously we used a wider element type and truncated. But its more efficient to keep the element type and drop unused elements. If BWI isn't supported and we have a i16 or i8 type, we'll extend it to be i32 and still use a truncate. llvm-svn: 319728
This commit is contained in:
parent
891fc7f37a
commit
276c770e57
@ -16139,18 +16139,40 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
|
||||
if (InVT.getVectorElementType() != MVT::i1)
|
||||
return SDValue();
|
||||
|
||||
// Extend VT if the target is 256 or 128bit vector and VLX is not supported.
|
||||
// Extend VT if the scalar type is v8/v16 and BWI is not supported.
|
||||
MVT ExtVT = VT;
|
||||
if (!VT.is512BitVector() && !Subtarget.hasVLX())
|
||||
ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
|
||||
if (!Subtarget.hasBWI() &&
|
||||
(VT.getVectorElementType().getSizeInBits() <= 16))
|
||||
ExtVT = MVT::getVectorVT(MVT::i32, NumElts);
|
||||
|
||||
SDValue One = DAG.getConstant(1, DL, ExtVT);
|
||||
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, DL);
|
||||
// Widen to 512-bits if VLX is not supported.
|
||||
MVT WideVT = ExtVT;
|
||||
if (!VT.is512BitVector() && !Subtarget.hasVLX()) {
|
||||
NumElts *= 512 / ExtVT.getSizeInBits();
|
||||
InVT = MVT::getVectorVT(MVT::i1, NumElts);
|
||||
In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT),
|
||||
In, DAG.getIntPtrConstant(0, DL));
|
||||
WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(),
|
||||
NumElts);
|
||||
}
|
||||
|
||||
SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero);
|
||||
if (VT == ExtVT)
|
||||
return SelectedVal;
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal);
|
||||
SDValue One = DAG.getConstant(1, DL, WideVT);
|
||||
SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, DL);
|
||||
|
||||
SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
|
||||
|
||||
// Truncate if we had to extend i16/i8 above.
|
||||
if (VT != ExtVT) {
|
||||
WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
|
||||
SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal);
|
||||
}
|
||||
|
||||
// Extract back to 128/256-bit if we widened.
|
||||
if (WideVT != VT)
|
||||
SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
|
||||
return SelectedVal;
|
||||
}
|
||||
|
||||
static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
|
||||
|
@ -1862,14 +1862,12 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
|
||||
; NOVL: # %bb.0:
|
||||
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
|
||||
; NOVL-NEXT: movq {{.*}}(%rip), %rax
|
||||
; NOVL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z}
|
||||
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
|
||||
; NOVL-NEXT: movl {{.*}}(%rip), %eax
|
||||
; NOVL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
|
||||
; NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
|
||||
; NOVL-NEXT: kshiftrw $8, %k1, %k1
|
||||
; NOVL-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z}
|
||||
; NOVL-NEXT: vpmovqd %zmm1, %ymm1
|
||||
; NOVL-NEXT: vcvtudq2pd %ymm1, %zmm1
|
||||
; NOVL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
|
||||
; NOVL-NEXT: vcvtdq2pd %ymm1, %zmm1
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
; VL-LABEL: ubto16f64:
|
||||
@ -1894,10 +1892,8 @@ define <8 x float> @ubto8f32(<8 x i32> %a) {
|
||||
; NOVL-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
|
||||
; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
|
||||
; NOVL-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||
; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
; VL-LABEL: ubto8f32:
|
||||
@ -1918,9 +1914,8 @@ define <8 x double> @ubto8f64(<8 x i32> %a) {
|
||||
; NOVL-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<def>
|
||||
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
|
||||
; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
|
||||
; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
|
||||
; NOVL-NEXT: retq
|
||||
;
|
||||
; VL-LABEL: ubto8f64:
|
||||
|
@ -109,9 +109,8 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
|
||||
; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512F-NEXT: kmovw %eax, %k1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: %xmm0<def> %xmm0<kill> %ymm0<kill>
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: # kill: %xmm0<def> %xmm0<kill> %zmm0<kill>
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
@ -166,8 +165,9 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
|
||||
; AVX512F-LABEL: ext_i8_8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: kmovw %edi, %k1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512F-NEXT: # kill: %xmm0<def> %xmm0<kill> %ymm0<kill>
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
@ -372,8 +372,8 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
|
||||
; AVX512F-LABEL: ext_i8_8i32:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: kmovw %edi, %k1
|
||||
; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512F-NEXT: # kill: %ymm0<def> %ymm0<kill> %zmm0<kill>
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: ext_i8_8i32:
|
||||
|
Loading…
x
Reference in New Issue
Block a user