mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:40:38 +00:00
[ARM,CDE] Implement predicated Q-register CDE intrinsics
Summary: This patch implements the following CDE intrinsics: T __arm_vcx1q_m(int coproc, T inactive, uint32_t imm, mve_pred_t p); T __arm_vcx2q_m(int coproc, T inactive, U n, uint32_t imm, mve_pred_t p); T __arm_vcx3q_m(int coproc, T inactive, U n, V m, uint32_t imm, mve_pred_t p); T __arm_vcx1qa_m(int coproc, T acc, uint32_t imm, mve_pred_t p); T __arm_vcx2qa_m(int coproc, T acc, U n, uint32_t imm, mve_pred_t p); T __arm_vcx3qa_m(int coproc, T acc, U n, V m, uint32_t imm, mve_pred_t p); The intrinsics are not part of the released ACLE spec, but internally at Arm we have reached consensus to add them to the next ACLE release. Reviewers: simon_tatham, MarkMurrayARM, ostannard, dmgreen Reviewed By: simon_tatham Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76610
This commit is contained in:
parent
a95e919d47
commit
3301f6f1d2
@ -1332,6 +1332,17 @@ multiclass CDEVCXVecIntrinsics<list<LLVMType> args> {
|
||||
!listconcat([llvm_i32_ty /* coproc */, llvm_v16i8_ty /* acc */],
|
||||
args, [llvm_i32_ty /* imm */]),
|
||||
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 2)>]>;
|
||||
|
||||
def _predicated : Intrinsic<
|
||||
[llvm_anyvector_ty],
|
||||
!listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* inactive */],
|
||||
args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
|
||||
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 2)>]>;
|
||||
def a_predicated : Intrinsic<
|
||||
[llvm_anyvector_ty],
|
||||
!listconcat([llvm_i32_ty /* coproc */, LLVMMatchType<0> /* acc */],
|
||||
args, [llvm_i32_ty /* imm */, llvm_anyvector_ty /* mask */]),
|
||||
[IntrNoMem, ImmArg<0>, ImmArg<!add(!size(args), 2)>]>;
|
||||
}
|
||||
|
||||
defm int_arm_cde_vcx1q : CDEVCXVecIntrinsics<[]>;
|
||||
|
@ -606,3 +606,61 @@ let Predicates = [HasCDE, HasMVEInt] in {
|
||||
(v16i8 (CDE_VCX3A_vec p_imm:$coproc, MQPR:$acc, MQPR:$n, MQPR:$m,
|
||||
imm_4b:$imm))>;
|
||||
}
|
||||
|
||||
multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
|
||||
def : Pat<(VTI.Vec (int_arm_cde_vcx1q_predicated timm:$coproc,
|
||||
(VTI.Vec MQPR:$inactive), timm:$imm,
|
||||
(VTI.Pred VCCR:$pred))),
|
||||
(VTI.Vec (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm, ARMVCCThen,
|
||||
(VTI.Pred VCCR:$pred),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
def : Pat<(VTI.Vec (int_arm_cde_vcx1qa_predicated timm:$coproc,
|
||||
(VTI.Vec MQPR:$acc), timm:$imm,
|
||||
(VTI.Pred VCCR:$pred))),
|
||||
(VTI.Vec (CDE_VCX1A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
|
||||
imm_12b:$imm, ARMVCCThen,
|
||||
(VTI.Pred VCCR:$pred)))>;
|
||||
|
||||
def : Pat<(VTI.Vec (int_arm_cde_vcx2q_predicated timm:$coproc,
|
||||
(VTI.Vec MQPR:$inactive),
|
||||
(v16i8 MQPR:$n), timm:$imm,
|
||||
(VTI.Pred VCCR:$pred))),
|
||||
(VTI.Vec (CDE_VCX2_vec p_imm:$coproc, (v16i8 MQPR:$n),
|
||||
imm_7b:$imm, ARMVCCThen,
|
||||
(VTI.Pred VCCR:$pred),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
def : Pat<(VTI.Vec (int_arm_cde_vcx2qa_predicated timm:$coproc,
|
||||
(VTI.Vec MQPR:$acc),
|
||||
(v16i8 MQPR:$n), timm:$imm,
|
||||
(VTI.Pred VCCR:$pred))),
|
||||
(VTI.Vec (CDE_VCX2A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
|
||||
(v16i8 MQPR:$n), timm:$imm, ARMVCCThen,
|
||||
(VTI.Pred VCCR:$pred)))>;
|
||||
|
||||
def : Pat<(VTI.Vec (int_arm_cde_vcx3q_predicated timm:$coproc,
|
||||
(VTI.Vec MQPR:$inactive),
|
||||
(v16i8 MQPR:$n), (v16i8 MQPR:$m),
|
||||
timm:$imm,
|
||||
(VTI.Pred VCCR:$pred))),
|
||||
(VTI.Vec (CDE_VCX3_vec p_imm:$coproc, (v16i8 MQPR:$n),
|
||||
(v16i8 MQPR:$m),
|
||||
imm_4b:$imm, ARMVCCThen,
|
||||
(VTI.Pred VCCR:$pred),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
def : Pat<(VTI.Vec (int_arm_cde_vcx3qa_predicated timm:$coproc,
|
||||
(VTI.Vec MQPR:$acc),
|
||||
(v16i8 MQPR:$n), (v16i8 MQPR:$m), timm:$imm,
|
||||
(VTI.Pred VCCR:$pred))),
|
||||
(VTI.Vec (CDE_VCX3A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
|
||||
(v16i8 MQPR:$n), (v16i8 MQPR:$m),
|
||||
imm_4b:$imm, ARMVCCThen,
|
||||
(VTI.Pred VCCR:$pred)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasCDE, HasMVEInt] in
|
||||
foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in
|
||||
defm : VCXPredicatedPat_m<VTI>;
|
||||
|
||||
let Predicates = [HasCDE, HasMVEFloat] in
|
||||
foreach VTI = [ MVE_v8f16, MVE_v4f32 ] in
|
||||
defm : VCXPredicatedPat_m<VTI>;
|
||||
|
@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
declare <16 x i8> @llvm.arm.cde.vcx1q(i32 immarg, i32 immarg)
|
||||
declare <16 x i8> @llvm.arm.cde.vcx1qa(i32 immarg, <16 x i8>, i32 immarg)
|
||||
@ -112,3 +112,103 @@ entry:
|
||||
%2 = call <16 x i8> @llvm.arm.cde.vcx3qa(i32 1, <16 x i8> %acc, <16 x i8> %0, <16 x i8> %1, i32 13)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
|
||||
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
||||
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
||||
declare <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 immarg, <8 x i16>, i32 immarg, <8 x i1>)
|
||||
declare <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 immarg, <16 x i8>, i32 immarg, <16 x i1>)
|
||||
declare <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 immarg, <4 x i32>, <16 x i8>, i32 immarg, <4 x i1>)
|
||||
declare <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, i32 immarg, <4 x i1>)
|
||||
declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
|
||||
declare <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vcx1q_m(<8 x i16> %inactive, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vcx1q_m:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcx1t p0, q0, #1111
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = call <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 0, <8 x i16> %inactive, i32 1111, <8 x i1> %1)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_m(<16 x i8> %acc, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vcx1qa_m:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcx1at p1, q0, #1112
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = call <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 1, <16 x i8> %acc, i32 1112, <16 x i1> %1)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vcx2q_m(<4 x i32> %inactive, <4 x float> %n, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vcx2q_m:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcx2t p0, q0, q1, #111
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast <4 x float> %n to <16 x i8>
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
||||
%3 = call <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 0, <4 x i32> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x float> @test_vcx2qa_m(<4 x float> %acc, <8 x half> %n, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vcx2qa_m:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcx2at p0, q0, q1, #112
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast <8 x half> %n to <16 x i8>
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
||||
%3 = call <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 0, <4 x float> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
|
||||
ret <4 x float> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vcx3q_m:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcx3t p0, q0, q1, q2, #11
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast <4 x float> %n to <16 x i8>
|
||||
%1 = zext i16 %p to i32
|
||||
%2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
|
||||
%3 = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2)
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x half> @test_vcx3qa_m(<4 x float> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vcx3qa_m:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vcx3at p0, q0, q1, q2, #12
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = bitcast <8 x half> %n to <16 x i8>
|
||||
%1 = bitcast <4 x i32> %m to <16 x i8>
|
||||
%2 = zext i16 %p to i32
|
||||
%3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
|
||||
%4 = call <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
|
||||
%5 = bitcast <4 x float> %4 to <8 x half>
|
||||
ret <8 x half> %5
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user