mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-20 10:07:02 +00:00
[X86] Change the signature of the AVX512 packed fp compare intrinsics to return vXi1 mask. Make bitcasts to scalar explicit in IR
Summary: This is the clang equivalent of r324827 Reviewers: zvi, delena, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43143 llvm-svn: 324828
This commit is contained in:
parent
4dccffc84a
commit
a57d64e30f
@ -8060,6 +8060,29 @@ static Value *EmitX86Select(CodeGenFunction &CGF,
|
||||
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
|
||||
}
|
||||
|
||||
static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,
|
||||
unsigned NumElts, Value *MaskIn) {
|
||||
if (MaskIn) {
|
||||
const auto *C = dyn_cast<Constant>(MaskIn);
|
||||
if (!C || !C->isAllOnesValue())
|
||||
Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
|
||||
}
|
||||
|
||||
if (NumElts < 8) {
|
||||
uint32_t Indices[8];
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
Indices[i] = i;
|
||||
for (unsigned i = NumElts; i != 8; ++i)
|
||||
Indices[i] = i % NumElts + NumElts;
|
||||
Cmp = CGF.Builder.CreateShuffleVector(
|
||||
Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
|
||||
}
|
||||
|
||||
return CGF.Builder.CreateBitCast(Cmp,
|
||||
IntegerType::get(CGF.getLLVMContext(),
|
||||
std::max(NumElts, 8U)));
|
||||
}
|
||||
|
||||
static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
|
||||
bool Signed, ArrayRef<Value *> Ops) {
|
||||
assert((Ops.size() == 2 || Ops.size() == 4) &&
|
||||
@ -8087,24 +8110,11 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
|
||||
Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
|
||||
}
|
||||
|
||||
if (Ops.size() == 4) {
|
||||
const auto *C = dyn_cast<Constant>(Ops[3]);
|
||||
if (!C || !C->isAllOnesValue())
|
||||
Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops[3], NumElts));
|
||||
}
|
||||
Value *MaskIn = nullptr;
|
||||
if (Ops.size() == 4)
|
||||
MaskIn = Ops[3];
|
||||
|
||||
if (NumElts < 8) {
|
||||
uint32_t Indices[8];
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
Indices[i] = i;
|
||||
for (unsigned i = NumElts; i != 8; ++i)
|
||||
Indices[i] = i % NumElts + NumElts;
|
||||
Cmp = CGF.Builder.CreateShuffleVector(
|
||||
Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
|
||||
}
|
||||
return CGF.Builder.CreateBitCast(Cmp,
|
||||
IntegerType::get(CGF.getLLVMContext(),
|
||||
std::max(NumElts, 8U)));
|
||||
return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
|
||||
}
|
||||
|
||||
static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {
|
||||
@ -8882,6 +8892,43 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
||||
return Builder.CreateExtractValue(Call, 1);
|
||||
}
|
||||
|
||||
case X86::BI__builtin_ia32_cmpps128_mask:
|
||||
case X86::BI__builtin_ia32_cmpps256_mask:
|
||||
case X86::BI__builtin_ia32_cmpps512_mask:
|
||||
case X86::BI__builtin_ia32_cmppd128_mask:
|
||||
case X86::BI__builtin_ia32_cmppd256_mask:
|
||||
case X86::BI__builtin_ia32_cmppd512_mask: {
|
||||
unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
|
||||
Value *MaskIn = Ops[3];
|
||||
Ops.erase(&Ops[3]);
|
||||
|
||||
Intrinsic::ID ID;
|
||||
switch (BuiltinID) {
|
||||
default: llvm_unreachable("Unsupported intrinsic!");
|
||||
case X86::BI__builtin_ia32_cmpps128_mask:
|
||||
ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmpps256_mask:
|
||||
ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmpps512_mask:
|
||||
ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmppd128_mask:
|
||||
ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmppd256_mask:
|
||||
ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
|
||||
break;
|
||||
case X86::BI__builtin_ia32_cmppd512_mask:
|
||||
ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
|
||||
break;
|
||||
}
|
||||
|
||||
Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
|
||||
return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
|
||||
}
|
||||
|
||||
// SSE packed comparison intrinsics
|
||||
case X86::BI__builtin_ia32_cmpeqps:
|
||||
case X86::BI__builtin_ia32_cmpeqpd:
|
||||
|
@ -1001,49 +1001,53 @@ __m512 test_mm512_unpacklo_ps(__m512 a, __m512 b)
|
||||
|
||||
__mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
|
||||
// CHECK-LABEL: @test_mm512_cmp_round_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.512
|
||||
// CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
|
||||
return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
__mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) {
|
||||
// CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.512
|
||||
// CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
|
||||
// CHECK: and <16 x i1> [[CMP]], {{.*}}
|
||||
return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
__mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) {
|
||||
// CHECK-LABEL: @test_mm512_cmp_ps_mask
|
||||
// CHECKn: @llvm.x86.avx512.mask.cmp.ps.512
|
||||
// CHECK: call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
|
||||
return _mm512_cmp_ps_mask(a, b, 0);
|
||||
}
|
||||
|
||||
__mmask16 test_mm512_mask_cmp_ps_mask(__mmask16 m, __m512 a, __m512 b) {
|
||||
// CHECK-LABEL: @test_mm512_mask_cmp_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.512
|
||||
// CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512
|
||||
// CHECK: and <16 x i1> [[CMP]], {{.*}}
|
||||
return _mm512_mask_cmp_ps_mask(m, a, b, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) {
|
||||
// CHECK-LABEL: @test_mm512_cmp_round_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
|
||||
// CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
|
||||
return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
__mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) {
|
||||
// CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
|
||||
// CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
|
||||
// CHECK: and <8 x i1> [[CMP]], {{.*}}
|
||||
return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
__mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) {
|
||||
// CHECK-LABEL: @test_mm512_cmp_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
|
||||
// CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
|
||||
return _mm512_cmp_pd_mask(a, b, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm512_mask_cmp_pd_mask(__mmask8 m, __m512d a, __m512d b) {
|
||||
// CHECK-LABEL: @test_mm512_mask_cmp_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.512
|
||||
// CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512
|
||||
// CHECK: and <8 x i1> [[CMP]], {{.*}}
|
||||
return _mm512_mask_cmp_pd_mask(m, a, b, 0);
|
||||
}
|
||||
|
||||
|
@ -1049,49 +1049,53 @@ __m128i test_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
|
||||
|
||||
__mmask8 test_mm256_cmp_ps_mask(__m256 __A, __m256 __B) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.256
|
||||
// CHECK: call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
|
||||
return (__mmask8)_mm256_cmp_ps_mask(__A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm256_mask_cmp_ps_mask(__mmask8 m, __m256 __A, __m256 __B) {
|
||||
// CHECK-LABEL: @test_mm256_mask_cmp_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.256
|
||||
// CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.ps.256
|
||||
// CHECK: and <8 x i1> [[CMP]], {{.*}}
|
||||
return _mm256_mask_cmp_ps_mask(m, __A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm_cmp_ps_mask(__m128 __A, __m128 __B) {
|
||||
// CHECK-LABEL: @test_mm_cmp_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.128
|
||||
// CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
|
||||
return (__mmask8)_mm_cmp_ps_mask(__A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm_mask_cmp_ps_mask(__mmask8 m, __m128 __A, __m128 __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_cmp_ps_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.ps.128
|
||||
// CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.ps.128
|
||||
// CHECK: and <4 x i1> [[CMP]], {{.*}}
|
||||
return _mm_mask_cmp_ps_mask(m, __A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm256_cmp_pd_mask(__m256d __A, __m256d __B) {
|
||||
// CHECK-LABEL: @test_mm256_cmp_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.256
|
||||
// CHECK: call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
|
||||
return (__mmask8)_mm256_cmp_pd_mask(__A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm256_mask_cmp_pd_mask(__mmask8 m, __m256d __A, __m256d __B) {
|
||||
// CHECK-LABEL: @test_mm256_mask_cmp_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.256
|
||||
// CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.mask.cmp.pd.256
|
||||
// CHECK: and <4 x i1> [[CMP]], {{.*}}
|
||||
return _mm256_mask_cmp_pd_mask(m, __A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm_cmp_pd_mask(__m128d __A, __m128d __B) {
|
||||
// CHECK-LABEL: @test_mm_cmp_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.128
|
||||
// CHECK: call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
|
||||
return (__mmask8)_mm_cmp_pd_mask(__A, __B, 0);
|
||||
}
|
||||
|
||||
__mmask8 test_mm_mask_cmp_pd_mask(__mmask8 m, __m128d __A, __m128d __B) {
|
||||
// CHECK-LABEL: @test_mm_mask_cmp_pd_mask
|
||||
// CHECK: @llvm.x86.avx512.mask.cmp.pd.128
|
||||
// CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.x86.avx512.mask.cmp.pd.128
|
||||
// CHECK: and <2 x i1> [[CMP]], {{.*}}
|
||||
return _mm_mask_cmp_pd_mask(m, __A, __B, 0);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user